xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b1ae27ea635b58bc3a5a567f99a72fa63199efe4)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.289 1999/03/18 00:35:56 balay Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i;
28 #if defined (USE_CTABLE)
29   int        ierr;
30 #endif
31 
32   PetscFunctionBegin;
33 #if defined (USE_CTABLE)
34   ierr = TableCreate(aij->n/5,&aij->colmap); CHKERRQ(ierr);
35   for ( i=0; i<n; i++ ){
36     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
37   }
38 #else
39   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
40   PLogObjectMemory(mat,aij->N*sizeof(int));
41   PetscMemzero(aij->colmap,aij->N*sizeof(int));
42   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
43 #endif
44   PetscFunctionReturn(0);
45 }
46 
47 #define CHUNKSIZE   15
48 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
49 { \
50  \
51     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
52     rmax = aimax[row]; nrow = ailen[row];  \
53     col1 = col - shift; \
54      \
55     low = 0; high = nrow; \
56     while (high-low > 5) { \
57       t = (low+high)/2; \
58       if (rp[t] > col) high = t; \
59       else             low  = t; \
60     } \
61       for ( _i=0; _i<nrow; _i++ ) { \
62         if (rp[_i] > col1) break; \
63         if (rp[_i] == col1) { \
64           if (addv == ADD_VALUES) ap[_i] += value;   \
65           else                  ap[_i] = value; \
66           goto a_noinsert; \
67         } \
68       }  \
69       if (nonew == 1) goto a_noinsert; \
70       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
71       if (nrow >= rmax) { \
72         /* there is no extra room in row, therefore enlarge */ \
73         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
74         Scalar *new_a; \
75  \
76         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
77  \
78         /* malloc new storage space */ \
79         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
80         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
81         new_j   = (int *) (new_a + new_nz); \
82         new_i   = new_j + new_nz; \
83  \
84         /* copy over old data into new slots */ \
85         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
86         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
87         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
88         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
89         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
90                                                            len*sizeof(int)); \
91         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
92         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
93                                                            len*sizeof(Scalar));  \
94         /* free up old matrix storage */ \
95  \
96         PetscFree(a->a);  \
97         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int)); \
162         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
163         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));  \
165         /* free up old matrix storage */ \
166  \
167         PetscFree(b->a);  \
168         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
169         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
170         b->singlemalloc = 1; \
171  \
172         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
173         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
174         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
175         b->maxnz += CHUNKSIZE; \
176         b->reallocs++; \
177       } \
178       N = nrow++ - 1; b->nz++; \
179       /* shift up all the later entries in this row */ \
180       for ( ii=N; ii>=_i; ii-- ) { \
181         rp[ii+1] = rp[ii]; \
182         ap[ii+1] = ap[ii]; \
183       } \
184       rp[_i] = col1;  \
185       ap[_i] = value;  \
186       b_noinsert: ; \
187       bilen[row] = nrow; \
188 }
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIAIJ"
192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   Scalar     value;
196   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197   int        cstart = aij->cstart, cend = aij->cend,row,col;
198   int        roworiented = aij->roworiented;
199 
200   /* Some Variables required in the macro */
201   Mat        A = aij->A;
202   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
203   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
204   Scalar     *aa = a->a;
205 
206   Mat        B = aij->B;
207   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
208   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
209   Scalar     *ba = b->a;
210 
211   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
212   int        nonew = a->nonew,shift = a->indexshift;
213   Scalar     *ap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217     if (im[i] < 0) continue;
218 #if defined(USE_PETSC_BOPT_g)
219     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart && im[i] < rend) {
222       row = im[i] - rstart;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart && in[j] < cend){
225           col = in[j] - cstart;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230         else if (in[j] < 0) continue;
231 #if defined(USE_PETSC_BOPT_g)
232         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!aij->colmap) {
237               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined (USE_CTABLE)
240             ierr = TableFind(aij->colmap,in[j]+1,&col); CHKERRQ(ierr);
241 	    col--;
242 #else
243             col = aij->colmap[in[j]] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
246               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
247               col =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
249               B = aij->B;
250               b = (Mat_SeqAIJ *) B->data;
251               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
252               ba = b->a;
253             }
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
256           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
257           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
258         }
259       }
260     } else {
261       if (!aij->donotstash) {
262         if (roworiented) {
263           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
264         } else {
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNC__
274 #define __FUNC__ "MatGetValues_MPIAIJ"
275 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
276 {
277   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
278   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
279   int        cstart = aij->cstart, cend = aij->cend,row,col;
280 
281   PetscFunctionBegin;
282   for ( i=0; i<m; i++ ) {
283     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
284     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
285     if (idxm[i] >= rstart && idxm[i] < rend) {
286       row = idxm[i] - rstart;
287       for ( j=0; j<n; j++ ) {
288         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
289         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
290         if (idxn[j] >= cstart && idxn[j] < cend){
291           col = idxn[j] - cstart;
292           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
293         } else {
294           if (!aij->colmap) {
295             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
296           }
297 #if defined (USE_CTABLE)
298           ierr = TableFind(aij->colmap,idxn[j]+1,&col); CHKERRQ(ierr);
299           col --;
300 #else
301           col = aij->colmap[idxn[j]] - 1;
302 #endif
303           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
304           else {
305             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
306           }
307         }
308       }
309     } else {
310       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
311     }
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNC__
317 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
318 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
319 {
320   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
321   int         ierr,nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners); CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr);
338   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNC__
344 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
345 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
348   int         i,j,rstart,ncols,n,ierr,flg;
349   int         *row,*col,other_disassembled;
350   Scalar      *val;
351   InsertMode  addv = mat->insertmode;
352 
353   PetscFunctionBegin;
354   if (!aij->donotstash) {
355     while (1) {
356       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
357       if (!flg) break;
358 
359       for ( i=0; i<n; ) {
360         /* Now identify the consecutive vals belonging to the same row */
361         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
362         if (j < n) ncols = j-i;
363         else       ncols = n-i;
364         /* Now assemble all these values with a single function call */
365         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
366         i = j;
367       }
368     }
369     ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr);
370   }
371 
372   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
373   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
374 
375   /* determine if any processor has disassembled, if so we must
376      also disassemble ourselfs, in order that we may reassemble. */
377   /*
378      if nonzero structure of submatrix B cannot change then we know that
379      no processor disassembled thus we can skip this stuff
380   */
381   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
382     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
383     if (mat->was_assembled && !other_disassembled) {
384       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
385     }
386   }
387 
388   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
389     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
390   }
391   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
392   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
393 
394   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
395   PetscFunctionReturn(0);
396 }
397 
398 #undef __FUNC__
399 #define __FUNC__ "MatZeroEntries_MPIAIJ"
400 int MatZeroEntries_MPIAIJ(Mat A)
401 {
402   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
403   int        ierr;
404 
405   PetscFunctionBegin;
406   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
407   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNC__
412 #define __FUNC__ "MatZeroRows_MPIAIJ"
413 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
414 {
415   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
416   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
417   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
418   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
419   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
420   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
421   MPI_Comm       comm = A->comm;
422   MPI_Request    *send_waits,*recv_waits;
423   MPI_Status     recv_status,*send_status;
424   IS             istmp;
425 
426   PetscFunctionBegin;
427   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
428   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
429 
430   /*  first count number of contributors to each processor */
431   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
432   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
433   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
434   for ( i=0; i<N; i++ ) {
435     idx = rows[i];
436     found = 0;
437     for ( j=0; j<size; j++ ) {
438       if (idx >= owners[j] && idx < owners[j+1]) {
439         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
440       }
441     }
442     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
443   }
444   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
445 
446   /* inform other processors of number of messages and max length*/
447   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
448   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
449   nrecvs = work[rank];
450   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
451   nmax = work[rank];
452   PetscFree(work);
453 
454   /* post receives:   */
455   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
456   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
457   for ( i=0; i<nrecvs; i++ ) {
458     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
459   }
460 
461   /* do sends:
462       1) starts[i] gives the starting index in svalues for stuff going to
463          the ith processor
464   */
465   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
466   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
467   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
468   starts[0] = 0;
469   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
470   for ( i=0; i<N; i++ ) {
471     svalues[starts[owner[i]]++] = rows[i];
472   }
473   ISRestoreIndices(is,&rows);
474 
475   starts[0] = 0;
476   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
477   count = 0;
478   for ( i=0; i<size; i++ ) {
479     if (procs[i]) {
480       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
481     }
482   }
483   PetscFree(starts);
484 
485   base = owners[rank];
486 
487   /*  wait on receives */
488   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
489   source = lens + nrecvs;
490   count  = nrecvs; slen = 0;
491   while (count) {
492     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
493     /* unpack receives into our local space */
494     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
495     source[imdex]  = recv_status.MPI_SOURCE;
496     lens[imdex]  = n;
497     slen += n;
498     count--;
499   }
500   PetscFree(recv_waits);
501 
502   /* move the data into the send scatter */
503   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
504   count = 0;
505   for ( i=0; i<nrecvs; i++ ) {
506     values = rvalues + i*nmax;
507     for ( j=0; j<lens[i]; j++ ) {
508       lrows[count++] = values[j] - base;
509     }
510   }
511   PetscFree(rvalues); PetscFree(lens);
512   PetscFree(owner); PetscFree(nprocs);
513 
514   /* actually zap the local rows */
515   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
516   PLogObjectParent(A,istmp);
517 
518   /*
519         Zero the required rows. If the "diagonal block" of the matrix
520      is square and the user wishes to set the diagonal we use seperate
521      code so that MatSetValues() is not called for each diagonal allocating
522      new memory, thus calling lots of mallocs and slowing things down.
523 
524        Contributed by: Mathew Knepley
525   */
526   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
527   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
528   if (diag && (l->A->M == l->A->N)) {
529     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
530   } else if (diag) {
531     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
532     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
533       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
534 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
535     }
536     for ( i = 0; i < slen; i++ ) {
537       row  = lrows[i] + rstart;
538       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
539     }
540     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
541     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
542   } else {
543     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
544   }
545   ierr = ISDestroy(istmp); CHKERRQ(ierr);
546   PetscFree(lrows);
547 
548   /* wait on sends */
549   if (nsends) {
550     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
551     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
552     PetscFree(send_status);
553   }
554   PetscFree(send_waits); PetscFree(svalues);
555 
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNC__
560 #define __FUNC__ "MatMult_MPIAIJ"
561 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
562 {
563   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
564   int        ierr,nt;
565 
566   PetscFunctionBegin;
567   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
568   if (nt != a->n) {
569     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
570   }
571   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
572   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
573   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
574   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNC__
579 #define __FUNC__ "MatMultAdd_MPIAIJ"
580 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
581 {
582   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
583   int        ierr;
584 
585   PetscFunctionBegin;
586   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
588   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
589   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNC__
594 #define __FUNC__ "MatMultTrans_MPIAIJ"
595 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
596 {
597   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
598   int        ierr;
599 
600   PetscFunctionBegin;
601   /* do nondiagonal part */
602   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
603   /* send it on its way */
604   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
605   /* do local part */
606   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
607   /* receive remote parts: note this assumes the values are not actually */
608   /* inserted in yy until the next line, which is true for my implementation*/
609   /* but is not perhaps always true. */
610   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
611   PetscFunctionReturn(0);
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
616 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
617 {
618   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
619   int        ierr;
620 
621   PetscFunctionBegin;
622   /* do nondiagonal part */
623   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
624   /* send it on its way */
625   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
626   /* do local part */
627   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
628   /* receive remote parts: note this assumes the values are not actually */
629   /* inserted in yy until the next line, which is true for my implementation*/
630   /* but is not perhaps always true. */
631   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636   This only works correctly for square matrices where the subblock A->A is the
637    diagonal block
638 */
639 #undef __FUNC__
640 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
641 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
642 {
643   int        ierr;
644   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
645 
646   PetscFunctionBegin;
647   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
648   if (a->rstart != a->cstart || a->rend != a->cend) {
649     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
650   }
651   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNC__
656 #define __FUNC__ "MatScale_MPIAIJ"
657 int MatScale_MPIAIJ(Scalar *aa,Mat A)
658 {
659   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
660   int        ierr;
661 
662   PetscFunctionBegin;
663   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
664   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatDestroy_MPIAIJ"
670 int MatDestroy_MPIAIJ(Mat mat)
671 {
672   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
673   int        ierr;
674 
675   PetscFunctionBegin;
676   if (--mat->refct > 0) PetscFunctionReturn(0);
677 
678   if (mat->mapping) {
679     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
680   }
681   if (mat->bmapping) {
682     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
683   }
684   if (mat->rmap) {
685     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
686   }
687   if (mat->cmap) {
688     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
689   }
690 #if defined(USE_PETSC_LOG)
691   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
692 #endif
693   ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr);
694   PetscFree(aij->rowners);
695   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
696   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
697 #if defined (USE_CTABLE)
698   if (aij->colmap) TableDelete(aij->colmap);
699 #else
700   if (aij->colmap) PetscFree(aij->colmap);
701 #endif
702   if (aij->garray) PetscFree(aij->garray);
703   if (aij->lvec)   VecDestroy(aij->lvec);
704   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
705   if (aij->rowvalues) PetscFree(aij->rowvalues);
706   PetscFree(aij);
707   PLogObjectDestroy(mat);
708   PetscHeaderDestroy(mat);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatView_MPIAIJ_Binary"
714 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
715 {
716   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
717   int         ierr;
718 
719   PetscFunctionBegin;
720   if (aij->size == 1) {
721     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
722   }
723   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNC__
728 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
729 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
730 {
731   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
732   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
733   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
734   int         size = aij->size;
735   FILE        *fd;
736   ViewerType  vtype;
737 
738   PetscFunctionBegin;
739   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
740   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
741     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
742     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
743       MatInfo info;
744       int     flg;
745       MPI_Comm_rank(mat->comm,&rank);
746       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
747       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
748       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
749       PetscSequentialPhaseBegin(mat->comm,1);
750       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
751          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
752       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
753          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
754       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
755       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
756       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
757       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
758       fflush(fd);
759       PetscSequentialPhaseEnd(mat->comm,1);
760       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
761       PetscFunctionReturn(0);
762     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
763       PetscFunctionReturn(0);
764     }
765   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
766     Draw       draw;
767     PetscTruth isnull;
768     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
769     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
770   }
771 
772   if (size == 1) {
773     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
774   } else {
775     /* assemble the entire matrix onto first processor. */
776     Mat         A;
777     Mat_SeqAIJ *Aloc;
778     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
779     Scalar      *a;
780 
781     if (!rank) {
782       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
783     } else {
784       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
785     }
786     PLogObjectParent(mat,A);
787 
788     /* copy over the A part */
789     Aloc = (Mat_SeqAIJ*) aij->A->data;
790     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
791     row = aij->rstart;
792     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
793     for ( i=0; i<m; i++ ) {
794       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
795       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
796     }
797     aj = Aloc->j;
798     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
799 
800     /* copy over the B part */
801     Aloc = (Mat_SeqAIJ*) aij->B->data;
802     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
803     row = aij->rstart;
804     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
805     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
806     for ( i=0; i<m; i++ ) {
807       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
808       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
809     }
810     PetscFree(ct);
811     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
812     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
813     /*
814        Everyone has to call to draw the matrix since the graphics waits are
815        synchronized across all processors that share the Draw object
816     */
817     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
818       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
819     }
820     ierr = MatDestroy(A); CHKERRQ(ierr);
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNC__
826 #define __FUNC__ "MatView_MPIAIJ"
827 int MatView_MPIAIJ(Mat mat,Viewer viewer)
828 {
829   int         ierr;
830   ViewerType  vtype;
831 
832   PetscFunctionBegin;
833   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
834   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
835       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
836     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
837   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
838     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
839   } else {
840     SETERRQ(1,1,"Viewer type not supported by PETSc object");
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 /*
846     This has to provide several versions.
847 
848      2) a) use only local smoothing updating outer values only once.
849         b) local smoothing updating outer values each inner iteration
850      3) color updating out values betwen colors.
851 */
852 #undef __FUNC__
853 #define __FUNC__ "MatRelax_MPIAIJ"
854 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
855                            double fshift,int its,Vec xx)
856 {
857   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
858   Mat        AA = mat->A, BB = mat->B;
859   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
860   Scalar     *b,*x,*xs,*ls,d,*v,sum;
861   int        ierr,*idx, *diag;
862   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
863 
864   PetscFunctionBegin;
865   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
866   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
867   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
868   xs = x + shift; /* shift by one for index start of 1 */
869   ls = ls + shift;
870   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
871   diag = A->diag;
872   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
873     if (flag & SOR_ZERO_INITIAL_GUESS) {
874       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
875       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
876       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
877       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
878       PetscFunctionReturn(0);
879     }
880     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
881     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
882     while (its--) {
883       /* go down through the rows */
884       for ( i=0; i<m; i++ ) {
885         n    = A->i[i+1] - A->i[i];
886 	PLogFlops(4*n+3);
887         idx  = A->j + A->i[i] + shift;
888         v    = A->a + A->i[i] + shift;
889         sum  = b[i];
890         SPARSEDENSEMDOT(sum,xs,v,idx,n);
891         d    = fshift + A->a[diag[i]+shift];
892         n    = B->i[i+1] - B->i[i];
893         idx  = B->j + B->i[i] + shift;
894         v    = B->a + B->i[i] + shift;
895         SPARSEDENSEMDOT(sum,ls,v,idx,n);
896         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
897       }
898       /* come up through the rows */
899       for ( i=m-1; i>-1; i-- ) {
900         n    = A->i[i+1] - A->i[i];
901 	PLogFlops(4*n+3)
902         idx  = A->j + A->i[i] + shift;
903         v    = A->a + A->i[i] + shift;
904         sum  = b[i];
905         SPARSEDENSEMDOT(sum,xs,v,idx,n);
906         d    = fshift + A->a[diag[i]+shift];
907         n    = B->i[i+1] - B->i[i];
908         idx  = B->j + B->i[i] + shift;
909         v    = B->a + B->i[i] + shift;
910         SPARSEDENSEMDOT(sum,ls,v,idx,n);
911         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
912       }
913     }
914   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
915     if (flag & SOR_ZERO_INITIAL_GUESS) {
916       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
917       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
918       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
919       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
920       PetscFunctionReturn(0);
921     }
922     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
923     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
924     while (its--) {
925       for ( i=0; i<m; i++ ) {
926         n    = A->i[i+1] - A->i[i];
927 	PLogFlops(4*n+3);
928         idx  = A->j + A->i[i] + shift;
929         v    = A->a + A->i[i] + shift;
930         sum  = b[i];
931         SPARSEDENSEMDOT(sum,xs,v,idx,n);
932         d    = fshift + A->a[diag[i]+shift];
933         n    = B->i[i+1] - B->i[i];
934         idx  = B->j + B->i[i] + shift;
935         v    = B->a + B->i[i] + shift;
936         SPARSEDENSEMDOT(sum,ls,v,idx,n);
937         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
938       }
939     }
940   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
941     if (flag & SOR_ZERO_INITIAL_GUESS) {
942       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
943       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
944       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
945       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
946       PetscFunctionReturn(0);
947     }
948     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
949     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
950     while (its--) {
951       for ( i=m-1; i>-1; i-- ) {
952         n    = A->i[i+1] - A->i[i];
953 	PLogFlops(4*n+3);
954         idx  = A->j + A->i[i] + shift;
955         v    = A->a + A->i[i] + shift;
956         sum  = b[i];
957         SPARSEDENSEMDOT(sum,xs,v,idx,n);
958         d    = fshift + A->a[diag[i]+shift];
959         n    = B->i[i+1] - B->i[i];
960         idx  = B->j + B->i[i] + shift;
961         v    = B->a + B->i[i] + shift;
962         SPARSEDENSEMDOT(sum,ls,v,idx,n);
963         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
964       }
965     }
966   } else {
967     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
968   }
969   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
970   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
971   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNC__
976 #define __FUNC__ "MatGetInfo_MPIAIJ"
977 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
978 {
979   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
980   Mat        A = mat->A, B = mat->B;
981   int        ierr;
982   double     isend[5], irecv[5];
983 
984   PetscFunctionBegin;
985   info->block_size     = 1.0;
986   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
987   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
988   isend[3] = info->memory;  isend[4] = info->mallocs;
989   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
990   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
991   isend[3] += info->memory;  isend[4] += info->mallocs;
992   if (flag == MAT_LOCAL) {
993     info->nz_used      = isend[0];
994     info->nz_allocated = isend[1];
995     info->nz_unneeded  = isend[2];
996     info->memory       = isend[3];
997     info->mallocs      = isend[4];
998   } else if (flag == MAT_GLOBAL_MAX) {
999     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1000     info->nz_used      = irecv[0];
1001     info->nz_allocated = irecv[1];
1002     info->nz_unneeded  = irecv[2];
1003     info->memory       = irecv[3];
1004     info->mallocs      = irecv[4];
1005   } else if (flag == MAT_GLOBAL_SUM) {
1006     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1007     info->nz_used      = irecv[0];
1008     info->nz_allocated = irecv[1];
1009     info->nz_unneeded  = irecv[2];
1010     info->memory       = irecv[3];
1011     info->mallocs      = irecv[4];
1012   }
1013   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1014   info->fill_ratio_needed = 0;
1015   info->factor_mallocs    = 0;
1016   info->rows_global       = (double)mat->M;
1017   info->columns_global    = (double)mat->N;
1018   info->rows_local        = (double)mat->m;
1019   info->columns_local     = (double)mat->N;
1020 
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNC__
1025 #define __FUNC__ "MatSetOption_MPIAIJ"
1026 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1027 {
1028   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1029   int        ierr;
1030 
1031   PetscFunctionBegin;
1032   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1033       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1034       op == MAT_COLUMNS_UNSORTED ||
1035       op == MAT_COLUMNS_SORTED ||
1036       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1037       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1038         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1039         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1040   } else if (op == MAT_ROW_ORIENTED) {
1041     a->roworiented = 1;
1042     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1043     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1044   } else if (op == MAT_ROWS_SORTED ||
1045              op == MAT_ROWS_UNSORTED ||
1046              op == MAT_SYMMETRIC ||
1047              op == MAT_STRUCTURALLY_SYMMETRIC ||
1048              op == MAT_YES_NEW_DIAGONALS)
1049     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1050   else if (op == MAT_COLUMN_ORIENTED) {
1051     a->roworiented = 0;
1052     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1053     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1054   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1055     a->donotstash = 1;
1056   } else if (op == MAT_NO_NEW_DIAGONALS){
1057     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1058   } else {
1059     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNC__
1065 #define __FUNC__ "MatGetSize_MPIAIJ"
1066 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1067 {
1068   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1069 
1070   PetscFunctionBegin;
1071   if (m) *m = mat->M;
1072   if (n) *n = mat->N;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNC__
1077 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1078 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1079 {
1080   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1081 
1082   PetscFunctionBegin;
1083   if (m) *m = mat->m;
1084   if (n) *n = mat->n;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNC__
1089 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1090 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1091 {
1092   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1093 
1094   PetscFunctionBegin;
1095   *m = mat->rstart; *n = mat->rend;
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNC__
1100 #define __FUNC__ "MatGetRow_MPIAIJ"
1101 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1104   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1105   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1106   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1107   int        *cmap, *idx_p;
1108 
1109   PetscFunctionBegin;
1110   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1111   mat->getrowactive = PETSC_TRUE;
1112 
1113   if (!mat->rowvalues && (idx || v)) {
1114     /*
1115         allocate enough space to hold information from the longest row.
1116     */
1117     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1118     int     max = 1,m = mat->m,tmp;
1119     for ( i=0; i<m; i++ ) {
1120       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1121       if (max < tmp) { max = tmp; }
1122     }
1123     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1124     mat->rowindices = (int *) (mat->rowvalues + max);
1125   }
1126 
1127   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1128   lrow = row - rstart;
1129 
1130   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1131   if (!v)   {pvA = 0; pvB = 0;}
1132   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1133   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1134   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1135   nztot = nzA + nzB;
1136 
1137   cmap  = mat->garray;
1138   if (v  || idx) {
1139     if (nztot) {
1140       /* Sort by increasing column numbers, assuming A and B already sorted */
1141       int imark = -1;
1142       if (v) {
1143         *v = v_p = mat->rowvalues;
1144         for ( i=0; i<nzB; i++ ) {
1145           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1146           else break;
1147         }
1148         imark = i;
1149         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1150         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1151       }
1152       if (idx) {
1153         *idx = idx_p = mat->rowindices;
1154         if (imark > -1) {
1155           for ( i=0; i<imark; i++ ) {
1156             idx_p[i] = cmap[cworkB[i]];
1157           }
1158         } else {
1159           for ( i=0; i<nzB; i++ ) {
1160             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1161             else break;
1162           }
1163           imark = i;
1164         }
1165         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1166         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1167       }
1168     } else {
1169       if (idx) *idx = 0;
1170       if (v)   *v   = 0;
1171     }
1172   }
1173   *nz = nztot;
1174   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1175   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNC__
1180 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1181 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1182 {
1183   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1184 
1185   PetscFunctionBegin;
1186   if (aij->getrowactive == PETSC_FALSE) {
1187     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1188   }
1189   aij->getrowactive = PETSC_FALSE;
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNC__
1194 #define __FUNC__ "MatNorm_MPIAIJ"
1195 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1196 {
1197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1198   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1199   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1200   double     sum = 0.0;
1201   Scalar     *v;
1202 
1203   PetscFunctionBegin;
1204   if (aij->size == 1) {
1205     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1206   } else {
1207     if (type == NORM_FROBENIUS) {
1208       v = amat->a;
1209       for (i=0; i<amat->nz; i++ ) {
1210 #if defined(USE_PETSC_COMPLEX)
1211         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1212 #else
1213         sum += (*v)*(*v); v++;
1214 #endif
1215       }
1216       v = bmat->a;
1217       for (i=0; i<bmat->nz; i++ ) {
1218 #if defined(USE_PETSC_COMPLEX)
1219         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1220 #else
1221         sum += (*v)*(*v); v++;
1222 #endif
1223       }
1224       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1225       *norm = sqrt(*norm);
1226     } else if (type == NORM_1) { /* max column norm */
1227       double *tmp, *tmp2;
1228       int    *jj, *garray = aij->garray;
1229       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1230       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1231       PetscMemzero(tmp,aij->N*sizeof(double));
1232       *norm = 0.0;
1233       v = amat->a; jj = amat->j;
1234       for ( j=0; j<amat->nz; j++ ) {
1235         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1236       }
1237       v = bmat->a; jj = bmat->j;
1238       for ( j=0; j<bmat->nz; j++ ) {
1239         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1240       }
1241       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1242       for ( j=0; j<aij->N; j++ ) {
1243         if (tmp2[j] > *norm) *norm = tmp2[j];
1244       }
1245       PetscFree(tmp); PetscFree(tmp2);
1246     } else if (type == NORM_INFINITY) { /* max row norm */
1247       double ntemp = 0.0;
1248       for ( j=0; j<amat->m; j++ ) {
1249         v = amat->a + amat->i[j] + shift;
1250         sum = 0.0;
1251         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1252           sum += PetscAbsScalar(*v); v++;
1253         }
1254         v = bmat->a + bmat->i[j] + shift;
1255         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1256           sum += PetscAbsScalar(*v); v++;
1257         }
1258         if (sum > ntemp) ntemp = sum;
1259       }
1260       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1261     } else {
1262       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1263     }
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNC__
1269 #define __FUNC__ "MatTranspose_MPIAIJ"
1270 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1271 {
1272   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1273   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1274   int        ierr,shift = Aloc->indexshift;
1275   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1276   Mat        B;
1277   Scalar     *array;
1278 
1279   PetscFunctionBegin;
1280   if (matout == PETSC_NULL && M != N) {
1281     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1282   }
1283 
1284   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1285 
1286   /* copy over the A part */
1287   Aloc = (Mat_SeqAIJ*) a->A->data;
1288   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1289   row = a->rstart;
1290   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1291   for ( i=0; i<m; i++ ) {
1292     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1293     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1294   }
1295   aj = Aloc->j;
1296   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1297 
1298   /* copy over the B part */
1299   Aloc = (Mat_SeqAIJ*) a->B->data;
1300   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1301   row = a->rstart;
1302   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1303   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1304   for ( i=0; i<m; i++ ) {
1305     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1306     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1307   }
1308   PetscFree(ct);
1309   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1310   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1311   if (matout != PETSC_NULL) {
1312     *matout = B;
1313   } else {
1314     PetscOps       *Abops;
1315     struct _MatOps *Aops;
1316 
1317     /* This isn't really an in-place transpose .... but free data structures from a */
1318     PetscFree(a->rowners);
1319     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1320     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1321 #if defined (USE_CTABLE)
1322     if (a->colmap) TableDelete(a->colmap);
1323 #else
1324     if (a->colmap) PetscFree(a->colmap);
1325 #endif
1326     if (a->garray) PetscFree(a->garray);
1327     if (a->lvec) VecDestroy(a->lvec);
1328     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1329     PetscFree(a);
1330 
1331     /*
1332        This is horrible, horrible code. We need to keep the
1333       A pointers for the bops and ops but copy everything
1334       else from C.
1335     */
1336     Abops = A->bops;
1337     Aops  = A->ops;
1338     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1339     A->bops = Abops;
1340     A->ops  = Aops;
1341     PetscHeaderDestroy(B);
1342   }
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNC__
1347 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1348 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1349 {
1350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1351   Mat a = aij->A, b = aij->B;
1352   int ierr,s1,s2,s3;
1353 
1354   PetscFunctionBegin;
1355   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1356   if (rr) {
1357     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1358     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1359     /* Overlap communication with computation. */
1360     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1361   }
1362   if (ll) {
1363     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1364     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1365     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1366   }
1367   /* scale  the diagonal block */
1368   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1369 
1370   if (rr) {
1371     /* Do a scatter end and then right scale the off-diagonal block */
1372     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1373     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1374   }
1375 
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #undef __FUNC__
1381 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1382 int MatPrintHelp_MPIAIJ(Mat A)
1383 {
1384   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1385   int        ierr;
1386 
1387   PetscFunctionBegin;
1388   if (!a->rank) {
1389     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1390   }
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #undef __FUNC__
1395 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1396 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1397 {
1398   PetscFunctionBegin;
1399   *bs = 1;
1400   PetscFunctionReturn(0);
1401 }
1402 #undef __FUNC__
1403 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1404 int MatSetUnfactored_MPIAIJ(Mat A)
1405 {
1406   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1407   int        ierr;
1408 
1409   PetscFunctionBegin;
1410   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNC__
1415 #define __FUNC__ "MatEqual_MPIAIJ"
1416 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1417 {
1418   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1419   Mat        a, b, c, d;
1420   PetscTruth flg;
1421   int        ierr;
1422 
1423   PetscFunctionBegin;
1424   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1425   a = matA->A; b = matA->B;
1426   c = matB->A; d = matB->B;
1427 
1428   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1429   if (flg == PETSC_TRUE) {
1430     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1431   }
1432   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNC__
1437 #define __FUNC__ "MatCopy_MPIAIJ"
1438 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1439 {
1440   int        ierr;
1441   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1442   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1443 
1444   PetscFunctionBegin;
1445   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1446     /* because of the column compression in the off-processor part of the matrix a->B,
1447        the number of columns in a->B and b->B may be different, hence we cannot call
1448        the MatCopy() directly on the two parts. If need be, we can provide a more
1449        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1450        then copying the submatrices */
1451     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1452   } else {
1453     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1454     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1460 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1461 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1462 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1463 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1464 
1465 /* -------------------------------------------------------------------*/
1466 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1467        MatGetRow_MPIAIJ,
1468        MatRestoreRow_MPIAIJ,
1469        MatMult_MPIAIJ,
1470        MatMultAdd_MPIAIJ,
1471        MatMultTrans_MPIAIJ,
1472        MatMultTransAdd_MPIAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477        0,
1478        0,
1479        MatRelax_MPIAIJ,
1480        MatTranspose_MPIAIJ,
1481        MatGetInfo_MPIAIJ,
1482        MatEqual_MPIAIJ,
1483        MatGetDiagonal_MPIAIJ,
1484        MatDiagonalScale_MPIAIJ,
1485        MatNorm_MPIAIJ,
1486        MatAssemblyBegin_MPIAIJ,
1487        MatAssemblyEnd_MPIAIJ,
1488        0,
1489        MatSetOption_MPIAIJ,
1490        MatZeroEntries_MPIAIJ,
1491        MatZeroRows_MPIAIJ,
1492        0,
1493        0,
1494        0,
1495        0,
1496        MatGetSize_MPIAIJ,
1497        MatGetLocalSize_MPIAIJ,
1498        MatGetOwnershipRange_MPIAIJ,
1499        0,
1500        0,
1501        0,
1502        0,
1503        MatDuplicate_MPIAIJ,
1504        0,
1505        0,
1506        0,
1507        0,
1508        0,
1509        MatGetSubMatrices_MPIAIJ,
1510        MatIncreaseOverlap_MPIAIJ,
1511        MatGetValues_MPIAIJ,
1512        MatCopy_MPIAIJ,
1513        MatPrintHelp_MPIAIJ,
1514        MatScale_MPIAIJ,
1515        0,
1516        0,
1517        0,
1518        MatGetBlockSize_MPIAIJ,
1519        0,
1520        0,
1521        0,
1522        0,
1523        MatFDColoringCreate_MPIAIJ,
1524        0,
1525        MatSetUnfactored_MPIAIJ,
1526        0,
1527        0,
1528        MatGetSubMatrix_MPIAIJ,
1529        0,
1530        0,
1531        MatGetMaps_Petsc};
1532 
1533 /* ----------------------------------------------------------------------------------------*/
1534 
1535 EXTERN_C_BEGIN
1536 #undef __FUNC__
1537 #define __FUNC__ "MatStoreValues_MPIAIJ"
1538 int MatStoreValues_MPIAIJ(Mat mat)
1539 {
1540   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1541   int        ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1545   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 EXTERN_C_END
1549 
1550 EXTERN_C_BEGIN
1551 #undef __FUNC__
1552 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1553 int MatRetrieveValues_MPIAIJ(Mat mat)
1554 {
1555   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1556   int        ierr;
1557 
1558   PetscFunctionBegin;
1559   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1560   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 EXTERN_C_END
1564 
1565 #include "pc.h"
1566 EXTERN_C_BEGIN
1567 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1568 EXTERN_C_END
1569 
1570 #undef __FUNC__
1571 #define __FUNC__ "MatCreateMPIAIJ"
1572 /*@C
1573    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1574    (the default parallel PETSc format).  For good matrix assembly performance
1575    the user should preallocate the matrix storage by setting the parameters
1576    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1577    performance can be increased by more than a factor of 50.
1578 
1579    Collective on MPI_Comm
1580 
1581    Input Parameters:
1582 +  comm - MPI communicator
1583 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1584            This value should be the same as the local size used in creating the
1585            y vector for the matrix-vector product y = Ax.
1586 .  n - This value should be the same as the local size used in creating the
1587        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1588        calculated if N is given) For square matrices n is almost always m.
1589 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1590 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1591 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1592            (same value is used for all local rows)
1593 .  d_nnz - array containing the number of nonzeros in the various rows of the
1594            DIAGONAL portion of the local submatrix (possibly different for each row)
1595            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1596            The size of this array is equal to the number of local rows, i.e 'm'.
1597            You must leave room for the diagonal entry even if it is zero.
1598 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1599            submatrix (same value is used for all local rows).
1600 -  o_nnz - array containing the number of nonzeros in the various rows of the
1601            OFF-DIAGONAL portion of the local submatrix (possibly different for
1602            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1603            structure. The size of this array is equal to the number
1604            of local rows, i.e 'm'.
1605 
1606    Output Parameter:
1607 .  A - the matrix
1608 
1609    Notes:
1610    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1611    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1612    storage requirements for this matrix.
1613 
1614    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1615    processor than it must be used on all processors that share the object for
1616    that argument.
1617 
1618    The AIJ format (also called the Yale sparse matrix format or
1619    compressed row storage), is fully compatible with standard Fortran 77
1620    storage.  That is, the stored row and column indices can begin at
1621    either one (as in Fortran) or zero.  See the users manual for details.
1622 
1623    The user MUST specify either the local or global matrix dimensions
1624    (possibly both).
1625 
1626    The parallel matrix is partitioned such that the first m0 rows belong to
1627    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1628    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1629 
1630    The DIAGONAL portion of the local submatrix of a processor can be defined
1631    as the submatrix which is obtained by extraction the part corresponding
1632    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1633    first row that belongs to the processor, and r2 is the last row belonging
1634    to the this processor. This is a square mxm matrix. The remaining portion
1635    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1636 
1637    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1638 
1639    By default, this format uses inodes (identical nodes) when possible.
1640    We search for consecutive rows with the same nonzero structure, thereby
1641    reusing matrix information to achieve increased efficiency.
1642 
1643    Options Database Keys:
1644 +  -mat_aij_no_inode  - Do not use inodes
1645 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1646 -  -mat_aij_oneindex - Internally use indexing starting at 1
1647         rather than 0.  Note that when calling MatSetValues(),
1648         the user still MUST index entries starting at 0!
1649 .   -mat_mpi - use the parallel matrix data structures even on one processor
1650                (defaults to using SeqBAIJ format on one processor)
1651 .   -mat_mpi - use the parallel matrix data structures even on one processor
1652                (defaults to using SeqAIJ format on one processor)
1653 
1654 
1655    Example usage:
1656 
1657    Consider the following 8x8 matrix with 34 non-zero values, that is
1658    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1659    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1660    as follows:
1661 
1662 .vb
1663             1  2  0  |  0  3  0  |  0  4
1664     Proc0   0  5  6  |  7  0  0  |  8  0
1665             9  0 10  | 11  0  0  | 12  0
1666     -------------------------------------
1667            13  0 14  | 15 16 17  |  0  0
1668     Proc1   0 18  0  | 19 20 21  |  0  0
1669             0  0  0  | 22 23  0  | 24  0
1670     -------------------------------------
1671     Proc2  25 26 27  |  0  0 28  | 29  0
1672            30  0  0  | 31 32 33  |  0 34
1673 .ve
1674 
1675    This can be represented as a collection of submatrices as:
1676 
1677 .vb
1678       A B C
1679       D E F
1680       G H I
1681 .ve
1682 
1683    Where the submatrices A,B,C are owned by proc0, D,E,F are
1684    owned by proc1, G,H,I are owned by proc2.
1685 
1686    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1687    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1688    The 'M','N' parameters are 8,8, and have the same values on all procs.
1689 
1690    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1691    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1692    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1693    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1694    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1695    matrix, ans [DF] as another SeqAIJ matrix.
1696 
1697    When d_nz, o_nz parameters are specified, d_nz storage elements are
1698    allocated for every row of the local diagonal submatrix, and o_nz
1699    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1700    One way to choose d_nz and o_nz is to use the max nonzerors per local
1701    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1702    In this case, the values of d_nz,o_nz are:
1703 .vb
1704      proc0 : dnz = 2, o_nz = 2
1705      proc1 : dnz = 3, o_nz = 2
1706      proc2 : dnz = 1, o_nz = 4
1707 .ve
1708    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1709    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1710    for proc3. i.e we are using 12+15+10=37 storage locations to store
1711    34 values.
1712 
1713    When d_nnz, o_nnz parameters are specified, the storage is specified
1714    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1715    In the above case the values for d_nnz,o_nnz are:
1716 .vb
1717      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1718      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1719      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1720 .ve
1721    Here the space allocated is sum of all the above values i.e 34, and
1722    hence pre-allocation is perfect.
1723 
1724    Level: intermediate
1725 
1726 .keywords: matrix, aij, compressed row, sparse, parallel
1727 
1728 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1729 @*/
1730 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1731 {
1732   Mat          B;
1733   Mat_MPIAIJ   *b;
1734   int          ierr, i,size,flag1 = 0, flag2 = 0;
1735 
1736   PetscFunctionBegin;
1737   MPI_Comm_size(comm,&size);
1738   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1739   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1740   if (!flag1 && !flag2 && size == 1) {
1741     if (M == PETSC_DECIDE) M = m;
1742     if (N == PETSC_DECIDE) N = n;
1743     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1744     PetscFunctionReturn(0);
1745   }
1746 
1747   *A = 0;
1748   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1749   PLogObjectCreate(B);
1750   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1751   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1752   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1753   B->ops->destroy    = MatDestroy_MPIAIJ;
1754   B->ops->view       = MatView_MPIAIJ;
1755   B->factor          = 0;
1756   B->assembled       = PETSC_FALSE;
1757   B->mapping         = 0;
1758 
1759   B->insertmode      = NOT_SET_VALUES;
1760   b->size            = size;
1761   MPI_Comm_rank(comm,&b->rank);
1762 
1763   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1764     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1765   }
1766 
1767   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1768   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1769   b->m = m; B->m = m;
1770   b->n = n; B->n = n;
1771   b->N = N; B->N = N;
1772   b->M = M; B->M = M;
1773 
1774   /* the information in the maps duplicates the information computed below, eventually
1775      we should remove the duplicate information that is not contained in the maps */
1776   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1777   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1778 
1779   /* build local table of row and column ownerships */
1780   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1781   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1782   b->cowners = b->rowners + b->size + 2;
1783   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1784   b->rowners[0] = 0;
1785   for ( i=2; i<=b->size; i++ ) {
1786     b->rowners[i] += b->rowners[i-1];
1787   }
1788   b->rstart = b->rowners[b->rank];
1789   b->rend   = b->rowners[b->rank+1];
1790   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1791   b->cowners[0] = 0;
1792   for ( i=2; i<=b->size; i++ ) {
1793     b->cowners[i] += b->cowners[i-1];
1794   }
1795   b->cstart = b->cowners[b->rank];
1796   b->cend   = b->cowners[b->rank+1];
1797 
1798   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1799   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1800   PLogObjectParent(B,b->A);
1801   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1802   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1803   PLogObjectParent(B,b->B);
1804 
1805   /* build cache for off array entries formed */
1806   ierr = MatStashCreate_Private(comm,1,&B->stash); CHKERRQ(ierr);
1807   b->donotstash  = 0;
1808   b->colmap      = 0;
1809   b->garray      = 0;
1810   b->roworiented = 1;
1811 
1812   /* stuff used for matrix vector multiply */
1813   b->lvec      = 0;
1814   b->Mvctx     = 0;
1815 
1816   /* stuff for MatGetRow() */
1817   b->rowindices   = 0;
1818   b->rowvalues    = 0;
1819   b->getrowactive = PETSC_FALSE;
1820 
1821   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1822                                      "MatStoreValues_MPIAIJ",
1823                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1825                                      "MatRetrieveValues_MPIAIJ",
1826                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1828                                      "MatGetDiagonalBlock_MPIAIJ",
1829                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1830   *A = B;
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNC__
1835 #define __FUNC__ "MatDuplicate_MPIAIJ"
1836 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1837 {
1838   Mat        mat;
1839   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1840   int        ierr, len=0, flg;
1841 
1842   PetscFunctionBegin;
1843   *newmat       = 0;
1844   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1845   PLogObjectCreate(mat);
1846   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1847   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1848   mat->ops->destroy = MatDestroy_MPIAIJ;
1849   mat->ops->view    = MatView_MPIAIJ;
1850   mat->factor       = matin->factor;
1851   mat->assembled    = PETSC_TRUE;
1852   mat->insertmode   = NOT_SET_VALUES;
1853 
1854   a->m = mat->m   = oldmat->m;
1855   a->n = mat->n   = oldmat->n;
1856   a->M = mat->M   = oldmat->M;
1857   a->N = mat->N   = oldmat->N;
1858 
1859   a->rstart       = oldmat->rstart;
1860   a->rend         = oldmat->rend;
1861   a->cstart       = oldmat->cstart;
1862   a->cend         = oldmat->cend;
1863   a->size         = oldmat->size;
1864   a->rank         = oldmat->rank;
1865   a->donotstash   = oldmat->donotstash;
1866   a->roworiented  = oldmat->roworiented;
1867   a->rowindices   = 0;
1868   a->rowvalues    = 0;
1869   a->getrowactive = PETSC_FALSE;
1870 
1871   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1872   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1873   a->cowners = a->rowners + a->size + 2;
1874   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1875   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash); CHKERRQ(ierr);
1876   if (oldmat->colmap) {
1877 #if defined (USE_CTABLE)
1878     ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
1879 #else
1880     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1881     PLogObjectMemory(mat,(a->N)*sizeof(int));
1882     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1883 #endif
1884   } else a->colmap = 0;
1885   if (oldmat->garray) {
1886     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1887     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1888     PLogObjectMemory(mat,len*sizeof(int));
1889     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1890   } else a->garray = 0;
1891 
1892   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1893   PLogObjectParent(mat,a->lvec);
1894   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1895   PLogObjectParent(mat,a->Mvctx);
1896   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
1897   PLogObjectParent(mat,a->A);
1898   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
1899   PLogObjectParent(mat,a->B);
1900   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1901   if (flg) {
1902     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1903   }
1904   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1905   *newmat = mat;
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #include "sys.h"
1910 
1911 #undef __FUNC__
1912 #define __FUNC__ "MatLoad_MPIAIJ"
1913 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1914 {
1915   Mat          A;
1916   Scalar       *vals,*svals;
1917   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1918   MPI_Status   status;
1919   int          i, nz, ierr, j,rstart, rend, fd;
1920   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1921   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1922   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1923 
1924   PetscFunctionBegin;
1925   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1926   if (!rank) {
1927     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1928     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1929     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1930     if (header[3] < 0) {
1931       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1932     }
1933   }
1934 
1935   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1936   M = header[1]; N = header[2];
1937   /* determine ownership of all rows */
1938   m = M/size + ((M % size) > rank);
1939   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1940   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1941   rowners[0] = 0;
1942   for ( i=2; i<=size; i++ ) {
1943     rowners[i] += rowners[i-1];
1944   }
1945   rstart = rowners[rank];
1946   rend   = rowners[rank+1];
1947 
1948   /* distribute row lengths to all processors */
1949   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1950   offlens = ourlens + (rend-rstart);
1951   if (!rank) {
1952     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1953     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1954     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1955     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1956     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1957     PetscFree(sndcounts);
1958   } else {
1959     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1960   }
1961 
1962   if (!rank) {
1963     /* calculate the number of nonzeros on each processor */
1964     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1965     PetscMemzero(procsnz,size*sizeof(int));
1966     for ( i=0; i<size; i++ ) {
1967       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1968         procsnz[i] += rowlengths[j];
1969       }
1970     }
1971     PetscFree(rowlengths);
1972 
1973     /* determine max buffer needed and allocate it */
1974     maxnz = 0;
1975     for ( i=0; i<size; i++ ) {
1976       maxnz = PetscMax(maxnz,procsnz[i]);
1977     }
1978     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1979 
1980     /* read in my part of the matrix column indices  */
1981     nz     = procsnz[0];
1982     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1983     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1984 
1985     /* read in every one elses and ship off */
1986     for ( i=1; i<size; i++ ) {
1987       nz   = procsnz[i];
1988       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1989       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1990     }
1991     PetscFree(cols);
1992   } else {
1993     /* determine buffer space needed for message */
1994     nz = 0;
1995     for ( i=0; i<m; i++ ) {
1996       nz += ourlens[i];
1997     }
1998     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1999 
2000     /* receive message of column indices*/
2001     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2002     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2003     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2004   }
2005 
2006   /* determine column ownership if matrix is not square */
2007   if (N != M) {
2008     n      = N/size + ((N % size) > rank);
2009     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2010     cstart = cend - n;
2011   } else {
2012     cstart = rstart;
2013     cend   = rend;
2014     n      = cend - cstart;
2015   }
2016 
2017   /* loop over local rows, determining number of off diagonal entries */
2018   PetscMemzero(offlens,m*sizeof(int));
2019   jj = 0;
2020   for ( i=0; i<m; i++ ) {
2021     for ( j=0; j<ourlens[i]; j++ ) {
2022       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2023       jj++;
2024     }
2025   }
2026 
2027   /* create our matrix */
2028   for ( i=0; i<m; i++ ) {
2029     ourlens[i] -= offlens[i];
2030   }
2031   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2032   A = *newmat;
2033   ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr);
2034   for ( i=0; i<m; i++ ) {
2035     ourlens[i] += offlens[i];
2036   }
2037 
2038   if (!rank) {
2039     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2040 
2041     /* read in my part of the matrix numerical values  */
2042     nz   = procsnz[0];
2043     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2044 
2045     /* insert into matrix */
2046     jj      = rstart;
2047     smycols = mycols;
2048     svals   = vals;
2049     for ( i=0; i<m; i++ ) {
2050       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2051       smycols += ourlens[i];
2052       svals   += ourlens[i];
2053       jj++;
2054     }
2055 
2056     /* read in other processors and ship out */
2057     for ( i=1; i<size; i++ ) {
2058       nz   = procsnz[i];
2059       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2060       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2061     }
2062     PetscFree(procsnz);
2063   } else {
2064     /* receive numeric values */
2065     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2066 
2067     /* receive message of values*/
2068     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2069     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2070     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2071 
2072     /* insert into matrix */
2073     jj      = rstart;
2074     smycols = mycols;
2075     svals   = vals;
2076     for ( i=0; i<m; i++ ) {
2077       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2078       smycols += ourlens[i];
2079       svals   += ourlens[i];
2080       jj++;
2081     }
2082   }
2083   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2084 
2085   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2086   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNC__
2091 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2092 /*
2093     Not great since it makes two copies of the submatrix, first an SeqAIJ
2094   in local and then by concatenating the local matrices the end result.
2095   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2096 */
2097 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2098 {
2099   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2100   Mat        *local,M, Mreuse;
2101   Scalar     *vwork,*aa;
2102   MPI_Comm   comm = mat->comm;
2103   Mat_SeqAIJ *aij;
2104   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2105 
2106   PetscFunctionBegin;
2107   MPI_Comm_rank(comm,&rank);
2108   MPI_Comm_size(comm,&size);
2109 
2110   if (call ==  MAT_REUSE_MATRIX) {
2111     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2112     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2113     local = &Mreuse;
2114     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2115   } else {
2116     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2117     Mreuse = *local;
2118     PetscFree(local);
2119   }
2120 
2121   /*
2122       m - number of local rows
2123       n - number of columns (same on all processors)
2124       rstart - first row in new global matrix generated
2125   */
2126   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2127   if (call == MAT_INITIAL_MATRIX) {
2128     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2129     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2130     ii  = aij->i;
2131     jj  = aij->j;
2132 
2133     /*
2134         Determine the number of non-zeros in the diagonal and off-diagonal
2135         portions of the matrix in order to do correct preallocation
2136     */
2137 
2138     /* first get start and end of "diagonal" columns */
2139     if (csize == PETSC_DECIDE) {
2140       nlocal = n/size + ((n % size) > rank);
2141     } else {
2142       nlocal = csize;
2143     }
2144     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2145     rstart = rend - nlocal;
2146     if (rank == size - 1 && rend != n) {
2147       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2148     }
2149 
2150     /* next, compute all the lengths */
2151     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2152     olens = dlens + m;
2153     for ( i=0; i<m; i++ ) {
2154       jend = ii[i+1] - ii[i];
2155       olen = 0;
2156       dlen = 0;
2157       for ( j=0; j<jend; j++ ) {
2158         if ( *jj < rstart || *jj >= rend) olen++;
2159         else dlen++;
2160         jj++;
2161       }
2162       olens[i] = olen;
2163       dlens[i] = dlen;
2164     }
2165     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2166     PetscFree(dlens);
2167   } else {
2168     int ml,nl;
2169 
2170     M = *newmat;
2171     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2172     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2173     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2174     /*
2175          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2176        rather than the slower MatSetValues().
2177     */
2178     M->was_assembled = PETSC_TRUE;
2179     M->assembled     = PETSC_FALSE;
2180   }
2181   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2182   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2183   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2184   ii  = aij->i;
2185   jj  = aij->j;
2186   aa  = aij->a;
2187   for (i=0; i<m; i++) {
2188     row   = rstart + i;
2189     nz    = ii[i+1] - ii[i];
2190     cwork = jj;     jj += nz;
2191     vwork = aa;     aa += nz;
2192     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2193   }
2194 
2195   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2196   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2197   *newmat = M;
2198 
2199   /* save submatrix used in processor for next request */
2200   if (call ==  MAT_INITIAL_MATRIX) {
2201     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2202     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2203   }
2204 
2205   PetscFunctionReturn(0);
2206 }
2207