xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f1af5d2ffeae1f5fc391a89939f4818e47770ae3)
1 /*$Id: mpiaij.c,v 1.306 1999/10/24 14:02:16 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 extern int MatSetUpMultiply_MPIAIJ(Mat);
8 extern int DisAssemble_MPIAIJ(Mat);
9 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12 extern int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = 1; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for ( ii=N; ii>=_i; ii-- ) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
198   Scalar     value;
199   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
200   int        cstart = aij->cstart, cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
206   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208 
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
211   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for ( i=0; i<m; i++ ) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for ( j=0; j<n; j++ ) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233         else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ *) B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (PETSC_USE_CTABLE)
301           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
322 {
323   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
324   int         ierr,nstash,reallocs;
325   InsertMode  addv;
326 
327   PetscFunctionBegin;
328   if (aij->donotstash) {
329     PetscFunctionReturn(0);
330   }
331 
332   /* make sure all processors are either in INSERTMODE or ADDMODE */
333   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
334   if (addv == (ADD_VALUES|INSERT_VALUES)) {
335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
336   }
337   mat->insertmode = addv; /* in case this processor had no cache */
338 
339   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
340   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
341   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 #undef __FUNC__
347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
349 {
350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
351   int         i,j,rstart,ncols,n,ierr,flg;
352   int         *row,*col,other_disassembled;
353   Scalar      *val;
354   InsertMode  addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for ( i=0; i<n; ) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388     }
389   }
390 
391   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
392     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
393   }
394   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
396 
397   if (aij->rowvalues) {
398     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
399     aij->rowvalues = 0;
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
422   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
423   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431 
432   PetscFunctionBegin;
433   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
434   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
435 
436   /*  first count number of contributors to each processor */
437   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
438   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
439   procs  = nprocs + size;
440   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
441   for ( i=0; i<N; i++ ) {
442     idx = rows[i];
443     found = 0;
444     for ( j=0; j<size; j++ ) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
450   }
451   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
452 
453   /* inform other processors of number of messages and max length*/
454   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
455   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
456   nrecvs = work[size+rank];
457   nmax   = work[rank];
458   ierr   = PetscFree(work);CHKERRQ(ierr);
459 
460   /* post receives:   */
461   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
462   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
463   for ( i=0; i<nrecvs; i++ ) {
464     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
465   }
466 
467   /* do sends:
468       1) starts[i] gives the starting index in svalues for stuff going to
469          the ith processor
470   */
471   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
472   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
473   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
474   starts[0] = 0;
475   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
476   for ( i=0; i<N; i++ ) {
477     svalues[starts[owner[i]]++] = rows[i];
478   }
479   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
480 
481   starts[0] = 0;
482   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
483   count = 0;
484   for ( i=0; i<size; i++ ) {
485     if (procs[i]) {
486       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
487     }
488   }
489   ierr = PetscFree(starts);CHKERRQ(ierr);
490 
491   base = owners[rank];
492 
493   /*  wait on receives */
494   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
495   source = lens + nrecvs;
496   count  = nrecvs; slen = 0;
497   while (count) {
498     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
499     /* unpack receives into our local space */
500     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
501     source[imdex]  = recv_status.MPI_SOURCE;
502     lens[imdex]    = n;
503     slen          += n;
504     count--;
505   }
506   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
507 
508   /* move the data into the send scatter */
509   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
510   count = 0;
511   for ( i=0; i<nrecvs; i++ ) {
512     values = rvalues + i*nmax;
513     for ( j=0; j<lens[i]; j++ ) {
514       lrows[count++] = values[j] - base;
515     }
516   }
517   ierr = PetscFree(rvalues);CHKERRQ(ierr);
518   ierr = PetscFree(lens);CHKERRQ(ierr);
519   ierr = PetscFree(owner);CHKERRQ(ierr);
520   ierr = PetscFree(nprocs);CHKERRQ(ierr);
521 
522   /* actually zap the local rows */
523   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
524   PLogObjectParent(A,istmp);
525 
526   /*
527         Zero the required rows. If the "diagonal block" of the matrix
528      is square and the user wishes to set the diagonal we use seperate
529      code so that MatSetValues() is not called for each diagonal allocating
530      new memory, thus calling lots of mallocs and slowing things down.
531 
532        Contributed by: Mathew Knepley
533   */
534   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
535   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
536   if (diag && (l->A->M == l->A->N)) {
537     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
538   } else if (diag) {
539     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
540     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
541       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
542 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
543     }
544     for ( i = 0; i < slen; i++ ) {
545       row  = lrows[i] + rstart;
546       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
547     }
548     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550   } else {
551     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
552   }
553   ierr = ISDestroy(istmp);CHKERRQ(ierr);
554   ierr = PetscFree(lrows);CHKERRQ(ierr);
555 
556   /* wait on sends */
557   if (nsends) {
558     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
559     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
560     ierr        = PetscFree(send_status);CHKERRQ(ierr);
561   }
562   ierr = PetscFree(send_waits);CHKERRQ(ierr);
563   ierr = PetscFree(svalues);CHKERRQ(ierr);
564 
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNC__
569 #define __FUNC__ "MatMult_MPIAIJ"
570 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
571 {
572   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
573   int        ierr,nt;
574 
575   PetscFunctionBegin;
576   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
577   if (nt != a->n) {
578     SETERRQ2(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A (%d) and xx (%d)",a->n,nt);
579   }
580   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
581   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
582   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNC__
588 #define __FUNC__ "MatMultAdd_MPIAIJ"
589 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
590 {
591   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
592   int        ierr;
593 
594   PetscFunctionBegin;
595   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
596   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
597   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 
602 #undef __FUNC__
603 #define __FUNC__ "MatMultTrans_MPIAIJ"
604 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
605 {
606   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
607   int        ierr;
608 
609   PetscFunctionBegin;
610   /* do nondiagonal part */
611   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
612   /* send it on its way */
613   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
614   /* do local part */
615   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
616   /* receive remote parts: note this assumes the values are not actually */
617   /* inserted in yy until the next line, which is true for my implementation*/
618   /* but is not perhaps always true. */
619   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNC__
624 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
625 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
628   int        ierr;
629 
630   PetscFunctionBegin;
631   /* do nondiagonal part */
632   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
633   /* send it on its way */
634   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   /* do local part */
636   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
637   /* receive remote parts: note this assumes the values are not actually */
638   /* inserted in yy until the next line, which is true for my implementation*/
639   /* but is not perhaps always true. */
640   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 /*
645   This only works correctly for square matrices where the subblock A->A is the
646    diagonal block
647 */
648 #undef __FUNC__
649 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
650 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
651 {
652   int        ierr;
653   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
654 
655   PetscFunctionBegin;
656   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
657   if (a->rstart != a->cstart || a->rend != a->cend) {
658     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
659   }
660   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNC__
665 #define __FUNC__ "MatScale_MPIAIJ"
666 int MatScale_MPIAIJ(Scalar *aa,Mat A)
667 {
668   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
669   int        ierr;
670 
671   PetscFunctionBegin;
672   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
673   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
674   PetscFunctionReturn(0);
675 }
676 
677 #undef __FUNC__
678 #define __FUNC__ "MatDestroy_MPIAIJ"
679 int MatDestroy_MPIAIJ(Mat mat)
680 {
681   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
682   int        ierr;
683 
684   PetscFunctionBegin;
685 
686   if (mat->mapping) {
687     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
688   }
689   if (mat->bmapping) {
690     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
691   }
692   if (mat->rmap) {
693     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
694   }
695   if (mat->cmap) {
696     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
697   }
698 #if defined(PETSC_USE_LOG)
699   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
700 #endif
701   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
702   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
703   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
704   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
705 #if defined (PETSC_USE_CTABLE)
706   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
707 #else
708   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
709 #endif
710   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
711   if (aij->lvec)   VecDestroy(aij->lvec);
712   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
713   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
714   ierr = PetscFree(aij);CHKERRQ(ierr);
715   PLogObjectDestroy(mat);
716   PetscHeaderDestroy(mat);
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNC__
721 #define __FUNC__ "MatView_MPIAIJ_Binary"
722 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
723 {
724   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
725   int         ierr;
726 
727   PetscFunctionBegin;
728   if (aij->size == 1) {
729     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
730   }
731   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNC__
736 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
737 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
738 {
739   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
740   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
741   int         ierr, format,shift = C->indexshift,rank = aij->rank,size = aij->size;
742   PetscTruth  isdraw,isascii,flg;
743 
744   PetscFunctionBegin;
745   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
746   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
747   if (isascii) {
748     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
749     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
750       MatInfo info;
751       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
752       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
753       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
754       if (flg) {
755         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
756 					      rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
757       } else {
758         ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
759 		    rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
760       }
761       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
762       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
763       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
764       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
765       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
766       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
767       PetscFunctionReturn(0);
768     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
769       PetscFunctionReturn(0);
770     }
771   } else if (isdraw) {
772     Draw       draw;
773     PetscTruth isnull;
774     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
775     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
776   }
777 
778   if (size == 1) {
779     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
780   } else {
781     /* assemble the entire matrix onto first processor. */
782     Mat         A;
783     Mat_SeqAIJ *Aloc;
784     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
785     Scalar      *a;
786 
787     if (!rank) {
788       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
789     } else {
790       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
791     }
792     PLogObjectParent(mat,A);
793 
794     /* copy over the A part */
795     Aloc = (Mat_SeqAIJ*) aij->A->data;
796     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
797     row = aij->rstart;
798     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
799     for ( i=0; i<m; i++ ) {
800       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
801       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
802     }
803     aj = Aloc->j;
804     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
805 
806     /* copy over the B part */
807     Aloc = (Mat_SeqAIJ*) aij->B->data;
808     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
809     row = aij->rstart;
810     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
811     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
812     for ( i=0; i<m; i++ ) {
813       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
814       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
815     }
816     ierr = PetscFree(ct);CHKERRQ(ierr);
817     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
818     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819     /*
820        Everyone has to call to draw the matrix since the graphics waits are
821        synchronized across all processors that share the Draw object
822     */
823     if (!rank) {
824       Viewer sviewer;
825       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
826       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
827       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
828     }
829     ierr = MatDestroy(A);CHKERRQ(ierr);
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNC__
835 #define __FUNC__ "MatView_MPIAIJ"
836 int MatView_MPIAIJ(Mat mat,Viewer viewer)
837 {
838   int        ierr;
839   PetscTruth isascii,isdraw,issocket,isbinary;
840 
841   PetscFunctionBegin;
842   ierr  = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
843   ierr  = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
844   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
845   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
846   if (isascii || isdraw || isbinary || issocket) {
847     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
848   } else {
849     SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 /*
855     This has to provide several versions.
856 
857      2) a) use only local smoothing updating outer values only once.
858         b) local smoothing updating outer values each inner iteration
859      3) color updating out values betwen colors.
860 */
861 #undef __FUNC__
862 #define __FUNC__ "MatRelax_MPIAIJ"
863 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
864                            double fshift,int its,Vec xx)
865 {
866   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
867   Mat        AA = mat->A, BB = mat->B;
868   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
869   Scalar     *b,*x,*xs,*ls,d,*v,sum;
870   int        ierr,*idx, *diag;
871   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
872 
873   PetscFunctionBegin;
874   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
875   diag = A->diag;
876   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
877     if (flag & SOR_ZERO_INITIAL_GUESS) {
878       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     }
881     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
882     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
883     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
884     if (xx != bb) {
885       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
886     } else {
887       b = x;
888     }
889     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
890     xs = x + shift; /* shift by one for index start of 1 */
891     ls = ls + shift;
892     while (its--) {
893       /* go down through the rows */
894       for ( i=0; i<m; i++ ) {
895         n    = A->i[i+1] - A->i[i];
896 	PLogFlops(4*n+3);
897         idx  = A->j + A->i[i] + shift;
898         v    = A->a + A->i[i] + shift;
899         sum  = b[i];
900         SPARSEDENSEMDOT(sum,xs,v,idx,n);
901         d    = fshift + A->a[diag[i]+shift];
902         n    = B->i[i+1] - B->i[i];
903         idx  = B->j + B->i[i] + shift;
904         v    = B->a + B->i[i] + shift;
905         SPARSEDENSEMDOT(sum,ls,v,idx,n);
906         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
907       }
908       /* come up through the rows */
909       for ( i=m-1; i>-1; i-- ) {
910         n    = A->i[i+1] - A->i[i];
911 	PLogFlops(4*n+3)
912         idx  = A->j + A->i[i] + shift;
913         v    = A->a + A->i[i] + shift;
914         sum  = b[i];
915         SPARSEDENSEMDOT(sum,xs,v,idx,n);
916         d    = fshift + A->a[diag[i]+shift];
917         n    = B->i[i+1] - B->i[i];
918         idx  = B->j + B->i[i] + shift;
919         v    = B->a + B->i[i] + shift;
920         SPARSEDENSEMDOT(sum,ls,v,idx,n);
921         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
922       }
923     }
924     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
926     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
927   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
928     if (flag & SOR_ZERO_INITIAL_GUESS) {
929       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
930       PetscFunctionReturn(0);
931     }
932     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
933     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
934     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
935     if (xx != bb) {
936       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
937     } else {
938       b = x;
939     }
940     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
941     xs = x + shift; /* shift by one for index start of 1 */
942     ls = ls + shift;
943     while (its--) {
944       for ( i=0; i<m; i++ ) {
945         n    = A->i[i+1] - A->i[i];
946 	PLogFlops(4*n+3);
947         idx  = A->j + A->i[i] + shift;
948         v    = A->a + A->i[i] + shift;
949         sum  = b[i];
950         SPARSEDENSEMDOT(sum,xs,v,idx,n);
951         d    = fshift + A->a[diag[i]+shift];
952         n    = B->i[i+1] - B->i[i];
953         idx  = B->j + B->i[i] + shift;
954         v    = B->a + B->i[i] + shift;
955         SPARSEDENSEMDOT(sum,ls,v,idx,n);
956         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
957       }
958     }
959     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
960     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
961     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
962   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
963     if (flag & SOR_ZERO_INITIAL_GUESS) {
964       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
965       PetscFunctionReturn(0);
966     }
967     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
968     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
969     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
970     if (xx != bb) {
971       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
972     } else {
973       b = x;
974     }
975     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
976     xs = x + shift; /* shift by one for index start of 1 */
977     ls = ls + shift;
978     while (its--) {
979       for ( i=m-1; i>-1; i-- ) {
980         n    = A->i[i+1] - A->i[i];
981 	PLogFlops(4*n+3);
982         idx  = A->j + A->i[i] + shift;
983         v    = A->a + A->i[i] + shift;
984         sum  = b[i];
985         SPARSEDENSEMDOT(sum,xs,v,idx,n);
986         d    = fshift + A->a[diag[i]+shift];
987         n    = B->i[i+1] - B->i[i];
988         idx  = B->j + B->i[i] + shift;
989         v    = B->a + B->i[i] + shift;
990         SPARSEDENSEMDOT(sum,ls,v,idx,n);
991         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
992       }
993     }
994     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
995     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
996     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
997   } else {
998     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNC__
1004 #define __FUNC__ "MatGetInfo_MPIAIJ"
1005 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1006 {
1007   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1008   Mat        A = mat->A, B = mat->B;
1009   int        ierr;
1010   double     isend[5], irecv[5];
1011 
1012   PetscFunctionBegin;
1013   info->block_size     = 1.0;
1014   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1015   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1016   isend[3] = info->memory;  isend[4] = info->mallocs;
1017   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1018   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1019   isend[3] += info->memory;  isend[4] += info->mallocs;
1020   if (flag == MAT_LOCAL) {
1021     info->nz_used      = isend[0];
1022     info->nz_allocated = isend[1];
1023     info->nz_unneeded  = isend[2];
1024     info->memory       = isend[3];
1025     info->mallocs      = isend[4];
1026   } else if (flag == MAT_GLOBAL_MAX) {
1027     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1028     info->nz_used      = irecv[0];
1029     info->nz_allocated = irecv[1];
1030     info->nz_unneeded  = irecv[2];
1031     info->memory       = irecv[3];
1032     info->mallocs      = irecv[4];
1033   } else if (flag == MAT_GLOBAL_SUM) {
1034     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1035     info->nz_used      = irecv[0];
1036     info->nz_allocated = irecv[1];
1037     info->nz_unneeded  = irecv[2];
1038     info->memory       = irecv[3];
1039     info->mallocs      = irecv[4];
1040   }
1041   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1042   info->fill_ratio_needed = 0;
1043   info->factor_mallocs    = 0;
1044   info->rows_global       = (double)mat->M;
1045   info->columns_global    = (double)mat->N;
1046   info->rows_local        = (double)mat->m;
1047   info->columns_local     = (double)mat->N;
1048 
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNC__
1053 #define __FUNC__ "MatSetOption_MPIAIJ"
1054 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1055 {
1056   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1057   int        ierr;
1058 
1059   PetscFunctionBegin;
1060   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1061       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1062       op == MAT_COLUMNS_UNSORTED ||
1063       op == MAT_COLUMNS_SORTED ||
1064       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1065       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1066         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1067         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1068   } else if (op == MAT_ROW_ORIENTED) {
1069     a->roworiented = 1;
1070     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1071     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1072   } else if (op == MAT_ROWS_SORTED ||
1073              op == MAT_ROWS_UNSORTED ||
1074              op == MAT_SYMMETRIC ||
1075              op == MAT_STRUCTURALLY_SYMMETRIC ||
1076              op == MAT_YES_NEW_DIAGONALS)
1077     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1078   else if (op == MAT_COLUMN_ORIENTED) {
1079     a->roworiented = 0;
1080     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1081     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1082   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1083     a->donotstash = 1;
1084   } else if (op == MAT_NO_NEW_DIAGONALS){
1085     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1086   } else {
1087     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1088   }
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNC__
1093 #define __FUNC__ "MatGetSize_MPIAIJ"
1094 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1095 {
1096   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1097 
1098   PetscFunctionBegin;
1099   if (m) *m = mat->M;
1100   if (n) *n = mat->N;
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNC__
1105 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1106 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1107 {
1108   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1109 
1110   PetscFunctionBegin;
1111   if (m) *m = mat->m;
1112   if (n) *n = mat->n;
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNC__
1117 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1118 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1119 {
1120   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1121 
1122   PetscFunctionBegin;
1123   *m = mat->rstart; *n = mat->rend;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNC__
1128 #define __FUNC__ "MatGetRow_MPIAIJ"
1129 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1130 {
1131   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1132   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1133   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1134   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1135   int        *cmap, *idx_p;
1136 
1137   PetscFunctionBegin;
1138   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1139   mat->getrowactive = PETSC_TRUE;
1140 
1141   if (!mat->rowvalues && (idx || v)) {
1142     /*
1143         allocate enough space to hold information from the longest row.
1144     */
1145     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1146     int     max = 1,m = mat->m,tmp;
1147     for ( i=0; i<m; i++ ) {
1148       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1149       if (max < tmp) { max = tmp; }
1150     }
1151     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1152     mat->rowindices = (int *) (mat->rowvalues + max);
1153   }
1154 
1155   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1156   lrow = row - rstart;
1157 
1158   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1159   if (!v)   {pvA = 0; pvB = 0;}
1160   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1161   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1162   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1163   nztot = nzA + nzB;
1164 
1165   cmap  = mat->garray;
1166   if (v  || idx) {
1167     if (nztot) {
1168       /* Sort by increasing column numbers, assuming A and B already sorted */
1169       int imark = -1;
1170       if (v) {
1171         *v = v_p = mat->rowvalues;
1172         for ( i=0; i<nzB; i++ ) {
1173           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1174           else break;
1175         }
1176         imark = i;
1177         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1178         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1179       }
1180       if (idx) {
1181         *idx = idx_p = mat->rowindices;
1182         if (imark > -1) {
1183           for ( i=0; i<imark; i++ ) {
1184             idx_p[i] = cmap[cworkB[i]];
1185           }
1186         } else {
1187           for ( i=0; i<nzB; i++ ) {
1188             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1189             else break;
1190           }
1191           imark = i;
1192         }
1193         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1194         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1195       }
1196     } else {
1197       if (idx) *idx = 0;
1198       if (v)   *v   = 0;
1199     }
1200   }
1201   *nz = nztot;
1202   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1203   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNC__
1208 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1209 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1210 {
1211   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1212 
1213   PetscFunctionBegin;
1214   if (aij->getrowactive == PETSC_FALSE) {
1215     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1216   }
1217   aij->getrowactive = PETSC_FALSE;
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #undef __FUNC__
1222 #define __FUNC__ "MatNorm_MPIAIJ"
1223 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1224 {
1225   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1226   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1227   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1228   double     sum = 0.0;
1229   Scalar     *v;
1230 
1231   PetscFunctionBegin;
1232   if (aij->size == 1) {
1233     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1234   } else {
1235     if (type == NORM_FROBENIUS) {
1236       v = amat->a;
1237       for (i=0; i<amat->nz; i++ ) {
1238 #if defined(PETSC_USE_COMPLEX)
1239         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1240 #else
1241         sum += (*v)*(*v); v++;
1242 #endif
1243       }
1244       v = bmat->a;
1245       for (i=0; i<bmat->nz; i++ ) {
1246 #if defined(PETSC_USE_COMPLEX)
1247         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1248 #else
1249         sum += (*v)*(*v); v++;
1250 #endif
1251       }
1252       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1253       *norm = sqrt(*norm);
1254     } else if (type == NORM_1) { /* max column norm */
1255       double *tmp, *tmp2;
1256       int    *jj, *garray = aij->garray;
1257       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1258       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1259       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1260       *norm = 0.0;
1261       v = amat->a; jj = amat->j;
1262       for ( j=0; j<amat->nz; j++ ) {
1263         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1264       }
1265       v = bmat->a; jj = bmat->j;
1266       for ( j=0; j<bmat->nz; j++ ) {
1267         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1268       }
1269       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1270       for ( j=0; j<aij->N; j++ ) {
1271         if (tmp2[j] > *norm) *norm = tmp2[j];
1272       }
1273       ierr = PetscFree(tmp);CHKERRQ(ierr);
1274       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1275     } else if (type == NORM_INFINITY) { /* max row norm */
1276       double ntemp = 0.0;
1277       for ( j=0; j<amat->m; j++ ) {
1278         v = amat->a + amat->i[j] + shift;
1279         sum = 0.0;
1280         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1281           sum += PetscAbsScalar(*v); v++;
1282         }
1283         v = bmat->a + bmat->i[j] + shift;
1284         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1285           sum += PetscAbsScalar(*v); v++;
1286         }
1287         if (sum > ntemp) ntemp = sum;
1288       }
1289       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1290     } else {
1291       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1292     }
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNC__
1298 #define __FUNC__ "MatTranspose_MPIAIJ"
1299 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1300 {
1301   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1302   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1303   int        ierr,shift = Aloc->indexshift;
1304   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1305   Mat        B;
1306   Scalar     *array;
1307 
1308   PetscFunctionBegin;
1309   if (matout == PETSC_NULL && M != N) {
1310     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1311   }
1312 
1313   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1314 
1315   /* copy over the A part */
1316   Aloc = (Mat_SeqAIJ*) a->A->data;
1317   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1318   row = a->rstart;
1319   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1320   for ( i=0; i<m; i++ ) {
1321     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1322     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1323   }
1324   aj = Aloc->j;
1325   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1326 
1327   /* copy over the B part */
1328   Aloc = (Mat_SeqAIJ*) a->B->data;
1329   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1330   row = a->rstart;
1331   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1332   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1333   for ( i=0; i<m; i++ ) {
1334     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1335     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1336   }
1337   ierr = PetscFree(ct);CHKERRQ(ierr);
1338   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1339   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1340   if (matout != PETSC_NULL) {
1341     *matout = B;
1342   } else {
1343     PetscOps       *Abops;
1344     struct _MatOps *Aops;
1345 
1346     /* This isn't really an in-place transpose .... but free data structures from a */
1347     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1348     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1349     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1350 #if defined (PETSC_USE_CTABLE)
1351     if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);}
1352 #else
1353     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1354 #endif
1355     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1356     if (a->lvec) VecDestroy(a->lvec);
1357     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1358     ierr = PetscFree(a);CHKERRQ(ierr);
1359 
1360     /*
1361        This is horrible, horrible code. We need to keep the
1362       A pointers for the bops and ops but copy everything
1363       else from C.
1364     */
1365     Abops   = A->bops;
1366     Aops    = A->ops;
1367     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1368     A->bops = Abops;
1369     A->ops  = Aops;
1370     PetscHeaderDestroy(B);
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNC__
1376 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1377 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1378 {
1379   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1380   Mat        a = aij->A, b = aij->B;
1381   int        ierr,s1,s2,s3;
1382 
1383   PetscFunctionBegin;
1384   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1385   if (rr) {
1386     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1387     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1388     /* Overlap communication with computation. */
1389     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1390   }
1391   if (ll) {
1392     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1393     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1394     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1395   }
1396   /* scale  the diagonal block */
1397   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1398 
1399   if (rr) {
1400     /* Do a scatter end and then right scale the off-diagonal block */
1401     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1402     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1403   }
1404 
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 
1409 #undef __FUNC__
1410 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1411 int MatPrintHelp_MPIAIJ(Mat A)
1412 {
1413   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1414   int        ierr;
1415 
1416   PetscFunctionBegin;
1417   if (!a->rank) {
1418     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1419   }
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNC__
1424 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1425 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1426 {
1427   PetscFunctionBegin;
1428   *bs = 1;
1429   PetscFunctionReturn(0);
1430 }
1431 #undef __FUNC__
1432 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1433 int MatSetUnfactored_MPIAIJ(Mat A)
1434 {
1435   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1436   int        ierr;
1437 
1438   PetscFunctionBegin;
1439   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 #undef __FUNC__
1444 #define __FUNC__ "MatEqual_MPIAIJ"
1445 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1446 {
1447   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1448   Mat        a, b, c, d;
1449   PetscTruth flg;
1450   int        ierr;
1451 
1452   PetscFunctionBegin;
1453   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1454   a = matA->A; b = matA->B;
1455   c = matB->A; d = matB->B;
1456 
1457   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1458   if (flg == PETSC_TRUE) {
1459     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1460   }
1461   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 #undef __FUNC__
1466 #define __FUNC__ "MatCopy_MPIAIJ"
1467 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1468 {
1469   int        ierr;
1470   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1471   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1472 
1473   PetscFunctionBegin;
1474   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1475     /* because of the column compression in the off-processor part of the matrix a->B,
1476        the number of columns in a->B and b->B may be different, hence we cannot call
1477        the MatCopy() directly on the two parts. If need be, we can provide a more
1478        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1479        then copying the submatrices */
1480     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1481   } else {
1482     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1483     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1489 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1490 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1491 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1492 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1493 
1494 /* -------------------------------------------------------------------*/
1495 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1496        MatGetRow_MPIAIJ,
1497        MatRestoreRow_MPIAIJ,
1498        MatMult_MPIAIJ,
1499        MatMultAdd_MPIAIJ,
1500        MatMultTrans_MPIAIJ,
1501        MatMultTransAdd_MPIAIJ,
1502        0,
1503        0,
1504        0,
1505        0,
1506        0,
1507        0,
1508        MatRelax_MPIAIJ,
1509        MatTranspose_MPIAIJ,
1510        MatGetInfo_MPIAIJ,
1511        MatEqual_MPIAIJ,
1512        MatGetDiagonal_MPIAIJ,
1513        MatDiagonalScale_MPIAIJ,
1514        MatNorm_MPIAIJ,
1515        MatAssemblyBegin_MPIAIJ,
1516        MatAssemblyEnd_MPIAIJ,
1517        0,
1518        MatSetOption_MPIAIJ,
1519        MatZeroEntries_MPIAIJ,
1520        MatZeroRows_MPIAIJ,
1521        0,
1522        0,
1523        0,
1524        0,
1525        MatGetSize_MPIAIJ,
1526        MatGetLocalSize_MPIAIJ,
1527        MatGetOwnershipRange_MPIAIJ,
1528        0,
1529        0,
1530        0,
1531        0,
1532        MatDuplicate_MPIAIJ,
1533        0,
1534        0,
1535        0,
1536        0,
1537        0,
1538        MatGetSubMatrices_MPIAIJ,
1539        MatIncreaseOverlap_MPIAIJ,
1540        MatGetValues_MPIAIJ,
1541        MatCopy_MPIAIJ,
1542        MatPrintHelp_MPIAIJ,
1543        MatScale_MPIAIJ,
1544        0,
1545        0,
1546        0,
1547        MatGetBlockSize_MPIAIJ,
1548        0,
1549        0,
1550        0,
1551        0,
1552        MatFDColoringCreate_MPIAIJ,
1553        0,
1554        MatSetUnfactored_MPIAIJ,
1555        0,
1556        0,
1557        MatGetSubMatrix_MPIAIJ,
1558        0,
1559        0,
1560        MatGetMaps_Petsc};
1561 
1562 /* ----------------------------------------------------------------------------------------*/
1563 
1564 EXTERN_C_BEGIN
1565 #undef __FUNC__
1566 #define __FUNC__ "MatStoreValues_MPIAIJ"
1567 int MatStoreValues_MPIAIJ(Mat mat)
1568 {
1569   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1570   int        ierr;
1571 
1572   PetscFunctionBegin;
1573   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1574   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 EXTERN_C_END
1578 
1579 EXTERN_C_BEGIN
1580 #undef __FUNC__
1581 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1582 int MatRetrieveValues_MPIAIJ(Mat mat)
1583 {
1584   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1585   int        ierr;
1586 
1587   PetscFunctionBegin;
1588   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1589   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1590   PetscFunctionReturn(0);
1591 }
1592 EXTERN_C_END
1593 
1594 #include "pc.h"
1595 EXTERN_C_BEGIN
1596 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1597 EXTERN_C_END
1598 
1599 #undef __FUNC__
1600 #define __FUNC__ "MatCreateMPIAIJ"
1601 /*@C
1602    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1603    (the default parallel PETSc format).  For good matrix assembly performance
1604    the user should preallocate the matrix storage by setting the parameters
1605    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1606    performance can be increased by more than a factor of 50.
1607 
1608    Collective on MPI_Comm
1609 
1610    Input Parameters:
1611 +  comm - MPI communicator
1612 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1613            This value should be the same as the local size used in creating the
1614            y vector for the matrix-vector product y = Ax.
1615 .  n - This value should be the same as the local size used in creating the
1616        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1617        calculated if N is given) For square matrices n is almost always m.
1618 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1619 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1620 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1621            (same value is used for all local rows)
1622 .  d_nnz - array containing the number of nonzeros in the various rows of the
1623            DIAGONAL portion of the local submatrix (possibly different for each row)
1624            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1625            The size of this array is equal to the number of local rows, i.e 'm'.
1626            You must leave room for the diagonal entry even if it is zero.
1627 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1628            submatrix (same value is used for all local rows).
1629 -  o_nnz - array containing the number of nonzeros in the various rows of the
1630            OFF-DIAGONAL portion of the local submatrix (possibly different for
1631            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1632            structure. The size of this array is equal to the number
1633            of local rows, i.e 'm'.
1634 
1635    Output Parameter:
1636 .  A - the matrix
1637 
1638    Notes:
1639    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1640    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1641    storage requirements for this matrix.
1642 
1643    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1644    processor than it must be used on all processors that share the object for
1645    that argument.
1646 
1647    The AIJ format (also called the Yale sparse matrix format or
1648    compressed row storage), is fully compatible with standard Fortran 77
1649    storage.  That is, the stored row and column indices can begin at
1650    either one (as in Fortran) or zero.  See the users manual for details.
1651 
1652    The user MUST specify either the local or global matrix dimensions
1653    (possibly both).
1654 
1655    The parallel matrix is partitioned such that the first m0 rows belong to
1656    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1657    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1658 
1659    The DIAGONAL portion of the local submatrix of a processor can be defined
1660    as the submatrix which is obtained by extraction the part corresponding
1661    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1662    first row that belongs to the processor, and r2 is the last row belonging
1663    to the this processor. This is a square mxm matrix. The remaining portion
1664    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1665 
1666    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1667 
1668    By default, this format uses inodes (identical nodes) when possible.
1669    We search for consecutive rows with the same nonzero structure, thereby
1670    reusing matrix information to achieve increased efficiency.
1671 
1672    Options Database Keys:
1673 +  -mat_aij_no_inode  - Do not use inodes
1674 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1675 -  -mat_aij_oneindex - Internally use indexing starting at 1
1676         rather than 0.  Note that when calling MatSetValues(),
1677         the user still MUST index entries starting at 0!
1678 .   -mat_mpi - use the parallel matrix data structures even on one processor
1679                (defaults to using SeqBAIJ format on one processor)
1680 .   -mat_mpi - use the parallel matrix data structures even on one processor
1681                (defaults to using SeqAIJ format on one processor)
1682 
1683 
1684    Example usage:
1685 
1686    Consider the following 8x8 matrix with 34 non-zero values, that is
1687    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1688    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1689    as follows:
1690 
1691 .vb
1692             1  2  0  |  0  3  0  |  0  4
1693     Proc0   0  5  6  |  7  0  0  |  8  0
1694             9  0 10  | 11  0  0  | 12  0
1695     -------------------------------------
1696            13  0 14  | 15 16 17  |  0  0
1697     Proc1   0 18  0  | 19 20 21  |  0  0
1698             0  0  0  | 22 23  0  | 24  0
1699     -------------------------------------
1700     Proc2  25 26 27  |  0  0 28  | 29  0
1701            30  0  0  | 31 32 33  |  0 34
1702 .ve
1703 
1704    This can be represented as a collection of submatrices as:
1705 
1706 .vb
1707       A B C
1708       D E F
1709       G H I
1710 .ve
1711 
1712    Where the submatrices A,B,C are owned by proc0, D,E,F are
1713    owned by proc1, G,H,I are owned by proc2.
1714 
1715    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1716    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1717    The 'M','N' parameters are 8,8, and have the same values on all procs.
1718 
1719    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1720    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1721    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1722    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1723    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1724    matrix, ans [DF] as another SeqAIJ matrix.
1725 
1726    When d_nz, o_nz parameters are specified, d_nz storage elements are
1727    allocated for every row of the local diagonal submatrix, and o_nz
1728    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1729    One way to choose d_nz and o_nz is to use the max nonzerors per local
1730    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1731    In this case, the values of d_nz,o_nz are:
1732 .vb
1733      proc0 : dnz = 2, o_nz = 2
1734      proc1 : dnz = 3, o_nz = 2
1735      proc2 : dnz = 1, o_nz = 4
1736 .ve
1737    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1738    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1739    for proc3. i.e we are using 12+15+10=37 storage locations to store
1740    34 values.
1741 
1742    When d_nnz, o_nnz parameters are specified, the storage is specified
1743    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1744    In the above case the values for d_nnz,o_nnz are:
1745 .vb
1746      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1747      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1748      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1749 .ve
1750    Here the space allocated is sum of all the above values i.e 34, and
1751    hence pre-allocation is perfect.
1752 
1753    Level: intermediate
1754 
1755 .keywords: matrix, aij, compressed row, sparse, parallel
1756 
1757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1758 @*/
1759 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1760 {
1761   Mat          B;
1762   Mat_MPIAIJ   *b;
1763   int          ierr, i,size;
1764   PetscTruth   flag1, flag2;
1765 
1766   PetscFunctionBegin;
1767 
1768   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1769   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1770   if (d_nnz) {
1771     for (i=0; i<m; i++) {
1772       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1773     }
1774   }
1775   if (o_nnz) {
1776     for (i=0; i<m; i++) {
1777       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1778     }
1779   }
1780 
1781   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1782   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1783   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1784   if (!flag1 && !flag2 && size == 1) {
1785     if (M == PETSC_DECIDE) M = m;
1786     if (N == PETSC_DECIDE) N = n;
1787     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1788     PetscFunctionReturn(0);
1789   }
1790 
1791   *A = 0;
1792   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1793   PLogObjectCreate(B);
1794   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1795   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1796   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1797   B->ops->destroy = MatDestroy_MPIAIJ;
1798   B->ops->view    = MatView_MPIAIJ;
1799   B->factor       = 0;
1800   B->assembled    = PETSC_FALSE;
1801   B->mapping      = 0;
1802 
1803   B->insertmode      = NOT_SET_VALUES;
1804   b->size            = size;
1805   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1806 
1807   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1808     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1809   }
1810 
1811   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1812   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1813   b->m = m; B->m = m;
1814   b->n = n; B->n = n;
1815   b->N = N; B->N = N;
1816   b->M = M; B->M = M;
1817 
1818   /* the information in the maps duplicates the information computed below, eventually
1819      we should remove the duplicate information that is not contained in the maps */
1820   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1821   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1822 
1823   /* build local table of row and column ownerships */
1824   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1825   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1826   b->cowners = b->rowners + b->size + 2;
1827   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1828   b->rowners[0] = 0;
1829   for ( i=2; i<=b->size; i++ ) {
1830     b->rowners[i] += b->rowners[i-1];
1831   }
1832   b->rstart = b->rowners[b->rank];
1833   b->rend   = b->rowners[b->rank+1];
1834   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1835   b->cowners[0] = 0;
1836   for ( i=2; i<=b->size; i++ ) {
1837     b->cowners[i] += b->cowners[i-1];
1838   }
1839   b->cstart = b->cowners[b->rank];
1840   b->cend   = b->cowners[b->rank+1];
1841 
1842   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1843   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1844   PLogObjectParent(B,b->A);
1845   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1846   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1847   PLogObjectParent(B,b->B);
1848 
1849   /* build cache for off array entries formed */
1850   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1851   b->donotstash  = 0;
1852   b->colmap      = 0;
1853   b->garray      = 0;
1854   b->roworiented = 1;
1855 
1856   /* stuff used for matrix vector multiply */
1857   b->lvec      = 0;
1858   b->Mvctx     = 0;
1859 
1860   /* stuff for MatGetRow() */
1861   b->rowindices   = 0;
1862   b->rowvalues    = 0;
1863   b->getrowactive = PETSC_FALSE;
1864 
1865   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1866                                      "MatStoreValues_MPIAIJ",
1867                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1868   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1869                                      "MatRetrieveValues_MPIAIJ",
1870                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1871   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1872                                      "MatGetDiagonalBlock_MPIAIJ",
1873                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1874   *A = B;
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNC__
1879 #define __FUNC__ "MatDuplicate_MPIAIJ"
1880 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1881 {
1882   Mat        mat;
1883   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1884   int        ierr, len = 0;
1885   PetscTruth flg;
1886 
1887   PetscFunctionBegin;
1888   *newmat       = 0;
1889   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1890   PLogObjectCreate(mat);
1891   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1892   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1893   mat->ops->destroy = MatDestroy_MPIAIJ;
1894   mat->ops->view    = MatView_MPIAIJ;
1895   mat->factor       = matin->factor;
1896   mat->assembled    = PETSC_TRUE;
1897   mat->insertmode   = NOT_SET_VALUES;
1898 
1899   a->m = mat->m   = oldmat->m;
1900   a->n = mat->n   = oldmat->n;
1901   a->M = mat->M   = oldmat->M;
1902   a->N = mat->N   = oldmat->N;
1903 
1904   a->rstart       = oldmat->rstart;
1905   a->rend         = oldmat->rend;
1906   a->cstart       = oldmat->cstart;
1907   a->cend         = oldmat->cend;
1908   a->size         = oldmat->size;
1909   a->rank         = oldmat->rank;
1910   a->donotstash   = oldmat->donotstash;
1911   a->roworiented  = oldmat->roworiented;
1912   a->rowindices   = 0;
1913   a->rowvalues    = 0;
1914   a->getrowactive = PETSC_FALSE;
1915 
1916   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1917   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1918   a->cowners = a->rowners + a->size + 2;
1919   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1920   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1921   if (oldmat->colmap) {
1922 #if defined (PETSC_USE_CTABLE)
1923     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1924 #else
1925     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1926     PLogObjectMemory(mat,(a->N)*sizeof(int));
1927     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1928 #endif
1929   } else a->colmap = 0;
1930   if (oldmat->garray) {
1931     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1932     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1933     PLogObjectMemory(mat,len*sizeof(int));
1934     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1935   } else a->garray = 0;
1936 
1937   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1938   PLogObjectParent(mat,a->lvec);
1939   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1940   PLogObjectParent(mat,a->Mvctx);
1941   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1942   PLogObjectParent(mat,a->A);
1943   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1944   PLogObjectParent(mat,a->B);
1945   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1946   if (flg) {
1947     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1948   }
1949   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1950   *newmat = mat;
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #include "sys.h"
1955 
1956 #undef __FUNC__
1957 #define __FUNC__ "MatLoad_MPIAIJ"
1958 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1959 {
1960   Mat          A;
1961   Scalar       *vals,*svals;
1962   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1963   MPI_Status   status;
1964   int          i, nz, ierr, j,rstart, rend, fd;
1965   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1966   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1967   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1968 
1969   PetscFunctionBegin;
1970   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1971   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1972   if (!rank) {
1973     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1974     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1975     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1976     if (header[3] < 0) {
1977       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1978     }
1979   }
1980 
1981   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1982   M = header[1]; N = header[2];
1983   /* determine ownership of all rows */
1984   m = M/size + ((M % size) > rank);
1985   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1986   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1987   rowners[0] = 0;
1988   for ( i=2; i<=size; i++ ) {
1989     rowners[i] += rowners[i-1];
1990   }
1991   rstart = rowners[rank];
1992   rend   = rowners[rank+1];
1993 
1994   /* distribute row lengths to all processors */
1995   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1996   offlens = ourlens + (rend-rstart);
1997   if (!rank) {
1998     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1999     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2000     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2001     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2002     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2003     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2004   } else {
2005     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2006   }
2007 
2008   if (!rank) {
2009     /* calculate the number of nonzeros on each processor */
2010     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2011     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2012     for ( i=0; i<size; i++ ) {
2013       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2014         procsnz[i] += rowlengths[j];
2015       }
2016     }
2017     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2018 
2019     /* determine max buffer needed and allocate it */
2020     maxnz = 0;
2021     for ( i=0; i<size; i++ ) {
2022       maxnz = PetscMax(maxnz,procsnz[i]);
2023     }
2024     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2025 
2026     /* read in my part of the matrix column indices  */
2027     nz     = procsnz[0];
2028     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2029     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2030 
2031     /* read in every one elses and ship off */
2032     for ( i=1; i<size; i++ ) {
2033       nz   = procsnz[i];
2034       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2035       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2036     }
2037     ierr = PetscFree(cols);CHKERRQ(ierr);
2038   } else {
2039     /* determine buffer space needed for message */
2040     nz = 0;
2041     for ( i=0; i<m; i++ ) {
2042       nz += ourlens[i];
2043     }
2044     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2045 
2046     /* receive message of column indices*/
2047     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2048     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2049     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2050   }
2051 
2052   /* determine column ownership if matrix is not square */
2053   if (N != M) {
2054     n      = N/size + ((N % size) > rank);
2055     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2056     cstart = cend - n;
2057   } else {
2058     cstart = rstart;
2059     cend   = rend;
2060     n      = cend - cstart;
2061   }
2062 
2063   /* loop over local rows, determining number of off diagonal entries */
2064   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2065   jj = 0;
2066   for ( i=0; i<m; i++ ) {
2067     for ( j=0; j<ourlens[i]; j++ ) {
2068       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2069       jj++;
2070     }
2071   }
2072 
2073   /* create our matrix */
2074   for ( i=0; i<m; i++ ) {
2075     ourlens[i] -= offlens[i];
2076   }
2077   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2078   A = *newmat;
2079   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2080   for ( i=0; i<m; i++ ) {
2081     ourlens[i] += offlens[i];
2082   }
2083 
2084   if (!rank) {
2085     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2086 
2087     /* read in my part of the matrix numerical values  */
2088     nz   = procsnz[0];
2089     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2090 
2091     /* insert into matrix */
2092     jj      = rstart;
2093     smycols = mycols;
2094     svals   = vals;
2095     for ( i=0; i<m; i++ ) {
2096       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2097       smycols += ourlens[i];
2098       svals   += ourlens[i];
2099       jj++;
2100     }
2101 
2102     /* read in other processors and ship out */
2103     for ( i=1; i<size; i++ ) {
2104       nz   = procsnz[i];
2105       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2106       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2107     }
2108     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2109   } else {
2110     /* receive numeric values */
2111     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2112 
2113     /* receive message of values*/
2114     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2115     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2116     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2117 
2118     /* insert into matrix */
2119     jj      = rstart;
2120     smycols = mycols;
2121     svals   = vals;
2122     for ( i=0; i<m; i++ ) {
2123       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2124       smycols += ourlens[i];
2125       svals   += ourlens[i];
2126       jj++;
2127     }
2128   }
2129   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2130   ierr = PetscFree(vals);CHKERRQ(ierr);
2131   ierr = PetscFree(mycols);CHKERRQ(ierr);
2132   ierr = PetscFree(rowners);CHKERRQ(ierr);
2133 
2134   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #undef __FUNC__
2140 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2141 /*
2142     Not great since it makes two copies of the submatrix, first an SeqAIJ
2143   in local and then by concatenating the local matrices the end result.
2144   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2145 */
2146 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2147 {
2148   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2149   Mat        *local,M, Mreuse;
2150   Scalar     *vwork,*aa;
2151   MPI_Comm   comm = mat->comm;
2152   Mat_SeqAIJ *aij;
2153   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2154 
2155   PetscFunctionBegin;
2156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2157   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2158 
2159   if (call ==  MAT_REUSE_MATRIX) {
2160     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2161     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2162     local = &Mreuse;
2163     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2164   } else {
2165     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2166     Mreuse = *local;
2167     ierr = PetscFree(local);CHKERRQ(ierr);
2168   }
2169 
2170   /*
2171       m - number of local rows
2172       n - number of columns (same on all processors)
2173       rstart - first row in new global matrix generated
2174   */
2175   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2176   if (call == MAT_INITIAL_MATRIX) {
2177     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2178     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2179     ii  = aij->i;
2180     jj  = aij->j;
2181 
2182     /*
2183         Determine the number of non-zeros in the diagonal and off-diagonal
2184         portions of the matrix in order to do correct preallocation
2185     */
2186 
2187     /* first get start and end of "diagonal" columns */
2188     if (csize == PETSC_DECIDE) {
2189       nlocal = n/size + ((n % size) > rank);
2190     } else {
2191       nlocal = csize;
2192     }
2193     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2194     rstart = rend - nlocal;
2195     if (rank == size - 1 && rend != n) {
2196       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2197     }
2198 
2199     /* next, compute all the lengths */
2200     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2201     olens = dlens + m;
2202     for ( i=0; i<m; i++ ) {
2203       jend = ii[i+1] - ii[i];
2204       olen = 0;
2205       dlen = 0;
2206       for ( j=0; j<jend; j++ ) {
2207         if ( *jj < rstart || *jj >= rend) olen++;
2208         else dlen++;
2209         jj++;
2210       }
2211       olens[i] = olen;
2212       dlens[i] = dlen;
2213     }
2214     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2215     ierr = PetscFree(dlens);CHKERRQ(ierr);
2216   } else {
2217     int ml,nl;
2218 
2219     M = *newmat;
2220     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2221     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2222     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2223     /*
2224          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2225        rather than the slower MatSetValues().
2226     */
2227     M->was_assembled = PETSC_TRUE;
2228     M->assembled     = PETSC_FALSE;
2229   }
2230   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2231   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2232   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2233   ii  = aij->i;
2234   jj  = aij->j;
2235   aa  = aij->a;
2236   for (i=0; i<m; i++) {
2237     row   = rstart + i;
2238     nz    = ii[i+1] - ii[i];
2239     cwork = jj;     jj += nz;
2240     vwork = aa;     aa += nz;
2241     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2242   }
2243 
2244   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2245   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2246   *newmat = M;
2247 
2248   /* save submatrix used in processor for next request */
2249   if (call ==  MAT_INITIAL_MATRIX) {
2250     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2251     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2252   }
2253 
2254   PetscFunctionReturn(0);
2255 }
2256