xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b833fc0d6de9ffb7984b704efec3d68e8cc57cab)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.287 1999/03/11 22:30:32 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i;
28 #if defined (USE_CTABLE)
29   int        ierr;
30 #endif
31 
32   PetscFunctionBegin;
33 #if defined (USE_CTABLE)
34   ierr = TableCreate(aij->n/5,&aij->colmap); CHKERRQ(ierr);
35   for ( i=0; i<n; i++ ){
36     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
37   }
38 #else
39   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
40   PLogObjectMemory(mat,aij->N*sizeof(int));
41   PetscMemzero(aij->colmap,aij->N*sizeof(int));
42   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
43 #endif
44   PetscFunctionReturn(0);
45 }
46 
47 #define CHUNKSIZE   15
48 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
49 { \
50  \
51     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
52     rmax = aimax[row]; nrow = ailen[row];  \
53     col1 = col - shift; \
54      \
55     low = 0; high = nrow; \
56     while (high-low > 5) { \
57       t = (low+high)/2; \
58       if (rp[t] > col) high = t; \
59       else             low  = t; \
60     } \
61       for ( _i=0; _i<nrow; _i++ ) { \
62         if (rp[_i] > col1) break; \
63         if (rp[_i] == col1) { \
64           if (addv == ADD_VALUES) ap[_i] += value;   \
65           else                  ap[_i] = value; \
66           goto a_noinsert; \
67         } \
68       }  \
69       if (nonew == 1) goto a_noinsert; \
70       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
71       if (nrow >= rmax) { \
72         /* there is no extra room in row, therefore enlarge */ \
73         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
74         Scalar *new_a; \
75  \
76         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
77  \
78         /* malloc new storage space */ \
79         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
80         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
81         new_j   = (int *) (new_a + new_nz); \
82         new_i   = new_j + new_nz; \
83  \
84         /* copy over old data into new slots */ \
85         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
86         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
87         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
88         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
89         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
90                                                            len*sizeof(int)); \
91         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
92         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
93                                                            len*sizeof(Scalar));  \
94         /* free up old matrix storage */ \
95  \
96         PetscFree(a->a);  \
97         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int)); \
162         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
163         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));  \
165         /* free up old matrix storage */ \
166  \
167         PetscFree(b->a);  \
168         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
169         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
170         b->singlemalloc = 1; \
171  \
172         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
173         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
174         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
175         b->maxnz += CHUNKSIZE; \
176         b->reallocs++; \
177       } \
178       N = nrow++ - 1; b->nz++; \
179       /* shift up all the later entries in this row */ \
180       for ( ii=N; ii>=_i; ii-- ) { \
181         rp[ii+1] = rp[ii]; \
182         ap[ii+1] = ap[ii]; \
183       } \
184       rp[_i] = col1;  \
185       ap[_i] = value;  \
186       b_noinsert: ; \
187       bilen[row] = nrow; \
188 }
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIAIJ"
192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   Scalar     value;
196   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197   int        cstart = aij->cstart, cend = aij->cend,row,col;
198   int        roworiented = aij->roworiented;
199 
200   /* Some Variables required in the macro */
201   Mat        A = aij->A;
202   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
203   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
204   Scalar     *aa = a->a;
205 
206   Mat        B = aij->B;
207   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
208   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
209   Scalar     *ba = b->a;
210 
211   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
212   int        nonew = a->nonew,shift = a->indexshift;
213   Scalar     *ap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217     if (im[i] < 0) continue;
218 #if defined(USE_PETSC_BOPT_g)
219     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart && im[i] < rend) {
222       row = im[i] - rstart;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart && in[j] < cend){
225           col = in[j] - cstart;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230         else if (in[j] < 0) continue;
231 #if defined(USE_PETSC_BOPT_g)
232         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!aij->colmap) {
237               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined (USE_CTABLE)
240             ierr = TableFind(aij->colmap,in[j]+1,&col); CHKERRQ(ierr);
241 	    col--;
242 #else
243             col = aij->colmap[in[j]] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
246               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
247               col =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
249               B = aij->B;
250               b = (Mat_SeqAIJ *) B->data;
251               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
252               ba = b->a;
253             }
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
256           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
257           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
258         }
259       }
260     } else {
261       if (!aij->donotstash) {
262         if (roworiented) {
263           ierr = StashValuesRoworiented_Private(&aij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
264         } else {
265           ierr = StashValuesColumnoriented_Private(&aij->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNC__
274 #define __FUNC__ "MatGetValues_MPIAIJ"
275 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
276 {
277   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
278   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
279   int        cstart = aij->cstart, cend = aij->cend,row,col;
280 
281   PetscFunctionBegin;
282   for ( i=0; i<m; i++ ) {
283     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
284     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
285     if (idxm[i] >= rstart && idxm[i] < rend) {
286       row = idxm[i] - rstart;
287       for ( j=0; j<n; j++ ) {
288         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
289         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
290         if (idxn[j] >= cstart && idxn[j] < cend){
291           col = idxn[j] - cstart;
292           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
293         } else {
294           if (!aij->colmap) {
295             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
296           }
297 #if defined (USE_CTABLE)
298           ierr = TableFind(aij->colmap,idxn[j]+1,&col); CHKERRQ(ierr);
299           col --;
300 #else
301           col = aij->colmap[idxn[j]] - 1;
302 #endif
303           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
304           else {
305             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
306           }
307         }
308       }
309     } else {
310       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
311     }
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNC__
317 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
318 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
319 {
320   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
321   int         ierr,nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = StashScatterBegin_Private(&aij->stash,aij->rowners); CHKERRQ(ierr);
337   ierr = StashGetInfo_Private(&aij->stash,&nstash,&reallocs); CHKERRQ(ierr);
338   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",
339            nstash,reallocs);
340   PetscFunctionReturn(0);
341 }
342 
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
346 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
349   int         i,j,rstart,ncols,n,ierr,flg;
350   int         *row,*col,other_disassembled;
351   Scalar      *val;
352   InsertMode  addv = mat->insertmode;
353 
354   PetscFunctionBegin;
355   if (!aij->donotstash) {
356     while (1) {
357       ierr = StashScatterGetMesg_Private(&aij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
358       if (!flg) break;
359 
360       for ( i=0; i<n; ) {
361         /* Now identify the consecutive vals belonging to the same row */
362         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
363         if (j < n) ncols = j-i;
364         else       ncols = n-i;
365         /* Now assemble all these values with a single function call */
366         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
367         i = j;
368       }
369     }
370     ierr = StashScatterEnd_Private(&aij->stash); CHKERRQ(ierr);
371   }
372 
373   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
374   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
375 
376   /* determine if any processor has disassembled, if so we must
377      also disassemble ourselfs, in order that we may reassemble. */
378   /*
379      if nonzero structure of submatrix B cannot change then we know that
380      no processor disassembled thus we can skip this stuff
381   */
382   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
383     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
384     if (mat->was_assembled && !other_disassembled) {
385       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
386     }
387   }
388 
389   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
390     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
391   }
392   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
394 
395   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
396   PetscFunctionReturn(0);
397 }
398 
399 #undef __FUNC__
400 #define __FUNC__ "MatZeroEntries_MPIAIJ"
401 int MatZeroEntries_MPIAIJ(Mat A)
402 {
403   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
404   int        ierr;
405 
406   PetscFunctionBegin;
407   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
408   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNC__
413 #define __FUNC__ "MatZeroRows_MPIAIJ"
414 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
415 {
416   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
417   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
418   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
419   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
420   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
421   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
422   MPI_Comm       comm = A->comm;
423   MPI_Request    *send_waits,*recv_waits;
424   MPI_Status     recv_status,*send_status;
425   IS             istmp;
426 
427   PetscFunctionBegin;
428   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
429   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
430 
431   /*  first count number of contributors to each processor */
432   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
433   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
434   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
435   for ( i=0; i<N; i++ ) {
436     idx = rows[i];
437     found = 0;
438     for ( j=0; j<size; j++ ) {
439       if (idx >= owners[j] && idx < owners[j+1]) {
440         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
441       }
442     }
443     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
444   }
445   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
446 
447   /* inform other processors of number of messages and max length*/
448   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
449   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
450   nrecvs = work[rank];
451   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
452   nmax = work[rank];
453   PetscFree(work);
454 
455   /* post receives:   */
456   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
457   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
458   for ( i=0; i<nrecvs; i++ ) {
459     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
460   }
461 
462   /* do sends:
463       1) starts[i] gives the starting index in svalues for stuff going to
464          the ith processor
465   */
466   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
467   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
468   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
469   starts[0] = 0;
470   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
471   for ( i=0; i<N; i++ ) {
472     svalues[starts[owner[i]]++] = rows[i];
473   }
474   ISRestoreIndices(is,&rows);
475 
476   starts[0] = 0;
477   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
478   count = 0;
479   for ( i=0; i<size; i++ ) {
480     if (procs[i]) {
481       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
482     }
483   }
484   PetscFree(starts);
485 
486   base = owners[rank];
487 
488   /*  wait on receives */
489   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
490   source = lens + nrecvs;
491   count  = nrecvs; slen = 0;
492   while (count) {
493     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
494     /* unpack receives into our local space */
495     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
496     source[imdex]  = recv_status.MPI_SOURCE;
497     lens[imdex]  = n;
498     slen += n;
499     count--;
500   }
501   PetscFree(recv_waits);
502 
503   /* move the data into the send scatter */
504   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
505   count = 0;
506   for ( i=0; i<nrecvs; i++ ) {
507     values = rvalues + i*nmax;
508     for ( j=0; j<lens[i]; j++ ) {
509       lrows[count++] = values[j] - base;
510     }
511   }
512   PetscFree(rvalues); PetscFree(lens);
513   PetscFree(owner); PetscFree(nprocs);
514 
515   /* actually zap the local rows */
516   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
517   PLogObjectParent(A,istmp);
518 
519   /*
520         Zero the required rows. If the "diagonal block" of the matrix
521      is square and the user wishes to set the diagonal we use seperate
522      code so that MatSetValues() is not called for each diagonal allocating
523      new memory, thus calling lots of mallocs and slowing things down.
524 
525        Contributed by: Mathew Knepley
526   */
527   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
528   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
529   if (diag && (l->A->M == l->A->N)) {
530     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
531   } else if (diag) {
532     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
533     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
534       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
535 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
536     }
537     for ( i = 0; i < slen; i++ ) {
538       row  = lrows[i] + rstart;
539       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
540     }
541     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
542     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
543   } else {
544     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
545   }
546   ierr = ISDestroy(istmp); CHKERRQ(ierr);
547   PetscFree(lrows);
548 
549   /* wait on sends */
550   if (nsends) {
551     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
552     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
553     PetscFree(send_status);
554   }
555   PetscFree(send_waits); PetscFree(svalues);
556 
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNC__
561 #define __FUNC__ "MatMult_MPIAIJ"
562 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
563 {
564   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
565   int        ierr,nt;
566 
567   PetscFunctionBegin;
568   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
569   if (nt != a->n) {
570     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
571   }
572   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
573   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
574   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
575   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNC__
580 #define __FUNC__ "MatMultAdd_MPIAIJ"
581 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
584   int        ierr;
585 
586   PetscFunctionBegin;
587   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
588   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
589   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
590   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNC__
595 #define __FUNC__ "MatMultTrans_MPIAIJ"
596 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
597 {
598   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
599   int        ierr;
600 
601   PetscFunctionBegin;
602   /* do nondiagonal part */
603   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
604   /* send it on its way */
605   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
606   /* do local part */
607   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
608   /* receive remote parts: note this assumes the values are not actually */
609   /* inserted in yy until the next line, which is true for my implementation*/
610   /* but is not perhaps always true. */
611   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
612   PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNC__
616 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
617 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
618 {
619   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
620   int        ierr;
621 
622   PetscFunctionBegin;
623   /* do nondiagonal part */
624   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
625   /* send it on its way */
626   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
627   /* do local part */
628   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
629   /* receive remote parts: note this assumes the values are not actually */
630   /* inserted in yy until the next line, which is true for my implementation*/
631   /* but is not perhaps always true. */
632   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637   This only works correctly for square matrices where the subblock A->A is the
638    diagonal block
639 */
640 #undef __FUNC__
641 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
642 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
643 {
644   int        ierr;
645   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
646 
647   PetscFunctionBegin;
648   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
649   if (a->rstart != a->cstart || a->rend != a->cend) {
650     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
651   }
652   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNC__
657 #define __FUNC__ "MatScale_MPIAIJ"
658 int MatScale_MPIAIJ(Scalar *aa,Mat A)
659 {
660   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
661   int        ierr;
662 
663   PetscFunctionBegin;
664   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
665   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNC__
670 #define __FUNC__ "MatDestroy_MPIAIJ"
671 int MatDestroy_MPIAIJ(Mat mat)
672 {
673   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
674   int        ierr;
675 
676   PetscFunctionBegin;
677   if (--mat->refct > 0) PetscFunctionReturn(0);
678 
679   if (mat->mapping) {
680     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
681   }
682   if (mat->bmapping) {
683     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
684   }
685   if (mat->rmap) {
686     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
687   }
688   if (mat->cmap) {
689     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
690   }
691 #if defined(USE_PETSC_LOG)
692   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
693 #endif
694   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
695   PetscFree(aij->rowners);
696   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
697   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
698 #if defined (USE_CTABLE)
699   if (aij->colmap) TableDelete(aij->colmap);
700 #else
701   if (aij->colmap) PetscFree(aij->colmap);
702 #endif
703   if (aij->garray) PetscFree(aij->garray);
704   if (aij->lvec)   VecDestroy(aij->lvec);
705   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
706   if (aij->rowvalues) PetscFree(aij->rowvalues);
707   PetscFree(aij);
708   PLogObjectDestroy(mat);
709   PetscHeaderDestroy(mat);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNC__
714 #define __FUNC__ "MatView_MPIAIJ_Binary"
715 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
716 {
717   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
718   int         ierr;
719 
720   PetscFunctionBegin;
721   if (aij->size == 1) {
722     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
723   }
724   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNC__
729 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
730 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
731 {
732   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
733   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
734   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
735   int         size = aij->size;
736   FILE        *fd;
737   ViewerType  vtype;
738 
739   PetscFunctionBegin;
740   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
741   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
742     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
743     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
744       MatInfo info;
745       int     flg;
746       MPI_Comm_rank(mat->comm,&rank);
747       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
748       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
749       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
750       PetscSequentialPhaseBegin(mat->comm,1);
751       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
752          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
753       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
754          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
755       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
756       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
757       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
758       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
759       fflush(fd);
760       PetscSequentialPhaseEnd(mat->comm,1);
761       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
762       PetscFunctionReturn(0);
763     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
764       PetscFunctionReturn(0);
765     }
766   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
767     Draw       draw;
768     PetscTruth isnull;
769     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
770     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
771   }
772 
773   if (size == 1) {
774     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
775   } else {
776     /* assemble the entire matrix onto first processor. */
777     Mat         A;
778     Mat_SeqAIJ *Aloc;
779     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
780     Scalar      *a;
781 
782     if (!rank) {
783       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
784     } else {
785       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
786     }
787     PLogObjectParent(mat,A);
788 
789     /* copy over the A part */
790     Aloc = (Mat_SeqAIJ*) aij->A->data;
791     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
792     row = aij->rstart;
793     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
794     for ( i=0; i<m; i++ ) {
795       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
796       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
797     }
798     aj = Aloc->j;
799     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
800 
801     /* copy over the B part */
802     Aloc = (Mat_SeqAIJ*) aij->B->data;
803     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
804     row = aij->rstart;
805     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
806     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
807     for ( i=0; i<m; i++ ) {
808       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
809       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
810     }
811     PetscFree(ct);
812     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
813     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
814     /*
815        Everyone has to call to draw the matrix since the graphics waits are
816        synchronized across all processors that share the Draw object
817     */
818     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
819       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
820     }
821     ierr = MatDestroy(A); CHKERRQ(ierr);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNC__
827 #define __FUNC__ "MatView_MPIAIJ"
828 int MatView_MPIAIJ(Mat mat,Viewer viewer)
829 {
830   int         ierr;
831   ViewerType  vtype;
832 
833   PetscFunctionBegin;
834   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
835   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
836       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
837     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
838   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
839     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
840   } else {
841     SETERRQ(1,1,"Viewer type not supported by PETSc object");
842   }
843   PetscFunctionReturn(0);
844 }
845 
846 /*
847     This has to provide several versions.
848 
849      2) a) use only local smoothing updating outer values only once.
850         b) local smoothing updating outer values each inner iteration
851      3) color updating out values betwen colors.
852 */
853 #undef __FUNC__
854 #define __FUNC__ "MatRelax_MPIAIJ"
855 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
856                            double fshift,int its,Vec xx)
857 {
858   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
859   Mat        AA = mat->A, BB = mat->B;
860   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
861   Scalar     *b,*x,*xs,*ls,d,*v,sum;
862   int        ierr,*idx, *diag;
863   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
864 
865   PetscFunctionBegin;
866   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
867   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
868   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
869   xs = x + shift; /* shift by one for index start of 1 */
870   ls = ls + shift;
871   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
872   diag = A->diag;
873   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
874     if (flag & SOR_ZERO_INITIAL_GUESS) {
875       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
876       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
877       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
878       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     }
881     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
882     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
883     while (its--) {
884       /* go down through the rows */
885       for ( i=0; i<m; i++ ) {
886         n    = A->i[i+1] - A->i[i];
887 	PLogFlops(4*n+3);
888         idx  = A->j + A->i[i] + shift;
889         v    = A->a + A->i[i] + shift;
890         sum  = b[i];
891         SPARSEDENSEMDOT(sum,xs,v,idx,n);
892         d    = fshift + A->a[diag[i]+shift];
893         n    = B->i[i+1] - B->i[i];
894         idx  = B->j + B->i[i] + shift;
895         v    = B->a + B->i[i] + shift;
896         SPARSEDENSEMDOT(sum,ls,v,idx,n);
897         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
898       }
899       /* come up through the rows */
900       for ( i=m-1; i>-1; i-- ) {
901         n    = A->i[i+1] - A->i[i];
902 	PLogFlops(4*n+3)
903         idx  = A->j + A->i[i] + shift;
904         v    = A->a + A->i[i] + shift;
905         sum  = b[i];
906         SPARSEDENSEMDOT(sum,xs,v,idx,n);
907         d    = fshift + A->a[diag[i]+shift];
908         n    = B->i[i+1] - B->i[i];
909         idx  = B->j + B->i[i] + shift;
910         v    = B->a + B->i[i] + shift;
911         SPARSEDENSEMDOT(sum,ls,v,idx,n);
912         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
913       }
914     }
915   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
916     if (flag & SOR_ZERO_INITIAL_GUESS) {
917       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
918       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
919       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
920       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
921       PetscFunctionReturn(0);
922     }
923     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
924     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
925     while (its--) {
926       for ( i=0; i<m; i++ ) {
927         n    = A->i[i+1] - A->i[i];
928 	PLogFlops(4*n+3);
929         idx  = A->j + A->i[i] + shift;
930         v    = A->a + A->i[i] + shift;
931         sum  = b[i];
932         SPARSEDENSEMDOT(sum,xs,v,idx,n);
933         d    = fshift + A->a[diag[i]+shift];
934         n    = B->i[i+1] - B->i[i];
935         idx  = B->j + B->i[i] + shift;
936         v    = B->a + B->i[i] + shift;
937         SPARSEDENSEMDOT(sum,ls,v,idx,n);
938         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
939       }
940     }
941   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
942     if (flag & SOR_ZERO_INITIAL_GUESS) {
943       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
944       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
945       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
946       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
947       PetscFunctionReturn(0);
948     }
949     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
950     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
951     while (its--) {
952       for ( i=m-1; i>-1; i-- ) {
953         n    = A->i[i+1] - A->i[i];
954 	PLogFlops(4*n+3);
955         idx  = A->j + A->i[i] + shift;
956         v    = A->a + A->i[i] + shift;
957         sum  = b[i];
958         SPARSEDENSEMDOT(sum,xs,v,idx,n);
959         d    = fshift + A->a[diag[i]+shift];
960         n    = B->i[i+1] - B->i[i];
961         idx  = B->j + B->i[i] + shift;
962         v    = B->a + B->i[i] + shift;
963         SPARSEDENSEMDOT(sum,ls,v,idx,n);
964         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
965       }
966     }
967   } else {
968     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
969   }
970   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
971   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
972   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 #undef __FUNC__
977 #define __FUNC__ "MatGetInfo_MPIAIJ"
978 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
979 {
980   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
981   Mat        A = mat->A, B = mat->B;
982   int        ierr;
983   double     isend[5], irecv[5];
984 
985   PetscFunctionBegin;
986   info->block_size     = 1.0;
987   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
988   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
989   isend[3] = info->memory;  isend[4] = info->mallocs;
990   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
991   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
992   isend[3] += info->memory;  isend[4] += info->mallocs;
993   if (flag == MAT_LOCAL) {
994     info->nz_used      = isend[0];
995     info->nz_allocated = isend[1];
996     info->nz_unneeded  = isend[2];
997     info->memory       = isend[3];
998     info->mallocs      = isend[4];
999   } else if (flag == MAT_GLOBAL_MAX) {
1000     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1001     info->nz_used      = irecv[0];
1002     info->nz_allocated = irecv[1];
1003     info->nz_unneeded  = irecv[2];
1004     info->memory       = irecv[3];
1005     info->mallocs      = irecv[4];
1006   } else if (flag == MAT_GLOBAL_SUM) {
1007     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1008     info->nz_used      = irecv[0];
1009     info->nz_allocated = irecv[1];
1010     info->nz_unneeded  = irecv[2];
1011     info->memory       = irecv[3];
1012     info->mallocs      = irecv[4];
1013   }
1014   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1015   info->fill_ratio_needed = 0;
1016   info->factor_mallocs    = 0;
1017   info->rows_global       = (double)mat->M;
1018   info->columns_global    = (double)mat->N;
1019   info->rows_local        = (double)mat->m;
1020   info->columns_local     = (double)mat->N;
1021 
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNC__
1026 #define __FUNC__ "MatSetOption_MPIAIJ"
1027 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1028 {
1029   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1030   int        ierr;
1031 
1032   PetscFunctionBegin;
1033   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1034       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1035       op == MAT_COLUMNS_UNSORTED ||
1036       op == MAT_COLUMNS_SORTED ||
1037       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1038       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1039         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1040         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1041   } else if (op == MAT_ROW_ORIENTED) {
1042     a->roworiented = 1;
1043     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1044     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1045   } else if (op == MAT_ROWS_SORTED ||
1046              op == MAT_ROWS_UNSORTED ||
1047              op == MAT_SYMMETRIC ||
1048              op == MAT_STRUCTURALLY_SYMMETRIC ||
1049              op == MAT_YES_NEW_DIAGONALS)
1050     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1051   else if (op == MAT_COLUMN_ORIENTED) {
1052     a->roworiented = 0;
1053     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1054     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1055   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1056     a->donotstash = 1;
1057   } else if (op == MAT_NO_NEW_DIAGONALS){
1058     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1059   } else {
1060     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNC__
1066 #define __FUNC__ "MatGetSize_MPIAIJ"
1067 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1068 {
1069   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1070 
1071   PetscFunctionBegin;
1072   if (m) *m = mat->M;
1073   if (n) *n = mat->N;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNC__
1078 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1079 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1080 {
1081   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1082 
1083   PetscFunctionBegin;
1084   if (m) *m = mat->m;
1085   if (n) *n = mat->n;
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNC__
1090 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1091 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1092 {
1093   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1094 
1095   PetscFunctionBegin;
1096   *m = mat->rstart; *n = mat->rend;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNC__
1101 #define __FUNC__ "MatGetRow_MPIAIJ"
1102 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1103 {
1104   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1105   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1106   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1107   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1108   int        *cmap, *idx_p;
1109 
1110   PetscFunctionBegin;
1111   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1112   mat->getrowactive = PETSC_TRUE;
1113 
1114   if (!mat->rowvalues && (idx || v)) {
1115     /*
1116         allocate enough space to hold information from the longest row.
1117     */
1118     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1119     int     max = 1,m = mat->m,tmp;
1120     for ( i=0; i<m; i++ ) {
1121       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1122       if (max < tmp) { max = tmp; }
1123     }
1124     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1125     mat->rowindices = (int *) (mat->rowvalues + max);
1126   }
1127 
1128   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1129   lrow = row - rstart;
1130 
1131   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1132   if (!v)   {pvA = 0; pvB = 0;}
1133   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1134   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1135   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1136   nztot = nzA + nzB;
1137 
1138   cmap  = mat->garray;
1139   if (v  || idx) {
1140     if (nztot) {
1141       /* Sort by increasing column numbers, assuming A and B already sorted */
1142       int imark = -1;
1143       if (v) {
1144         *v = v_p = mat->rowvalues;
1145         for ( i=0; i<nzB; i++ ) {
1146           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1147           else break;
1148         }
1149         imark = i;
1150         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1151         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1152       }
1153       if (idx) {
1154         *idx = idx_p = mat->rowindices;
1155         if (imark > -1) {
1156           for ( i=0; i<imark; i++ ) {
1157             idx_p[i] = cmap[cworkB[i]];
1158           }
1159         } else {
1160           for ( i=0; i<nzB; i++ ) {
1161             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1162             else break;
1163           }
1164           imark = i;
1165         }
1166         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1167         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1168       }
1169     } else {
1170       if (idx) *idx = 0;
1171       if (v)   *v   = 0;
1172     }
1173   }
1174   *nz = nztot;
1175   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1176   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNC__
1181 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1182 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1183 {
1184   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1185 
1186   PetscFunctionBegin;
1187   if (aij->getrowactive == PETSC_FALSE) {
1188     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1189   }
1190   aij->getrowactive = PETSC_FALSE;
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNC__
1195 #define __FUNC__ "MatNorm_MPIAIJ"
1196 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1197 {
1198   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1199   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1200   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1201   double     sum = 0.0;
1202   Scalar     *v;
1203 
1204   PetscFunctionBegin;
1205   if (aij->size == 1) {
1206     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1207   } else {
1208     if (type == NORM_FROBENIUS) {
1209       v = amat->a;
1210       for (i=0; i<amat->nz; i++ ) {
1211 #if defined(USE_PETSC_COMPLEX)
1212         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1213 #else
1214         sum += (*v)*(*v); v++;
1215 #endif
1216       }
1217       v = bmat->a;
1218       for (i=0; i<bmat->nz; i++ ) {
1219 #if defined(USE_PETSC_COMPLEX)
1220         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1221 #else
1222         sum += (*v)*(*v); v++;
1223 #endif
1224       }
1225       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1226       *norm = sqrt(*norm);
1227     } else if (type == NORM_1) { /* max column norm */
1228       double *tmp, *tmp2;
1229       int    *jj, *garray = aij->garray;
1230       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1231       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1232       PetscMemzero(tmp,aij->N*sizeof(double));
1233       *norm = 0.0;
1234       v = amat->a; jj = amat->j;
1235       for ( j=0; j<amat->nz; j++ ) {
1236         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1237       }
1238       v = bmat->a; jj = bmat->j;
1239       for ( j=0; j<bmat->nz; j++ ) {
1240         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1241       }
1242       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1243       for ( j=0; j<aij->N; j++ ) {
1244         if (tmp2[j] > *norm) *norm = tmp2[j];
1245       }
1246       PetscFree(tmp); PetscFree(tmp2);
1247     } else if (type == NORM_INFINITY) { /* max row norm */
1248       double ntemp = 0.0;
1249       for ( j=0; j<amat->m; j++ ) {
1250         v = amat->a + amat->i[j] + shift;
1251         sum = 0.0;
1252         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1253           sum += PetscAbsScalar(*v); v++;
1254         }
1255         v = bmat->a + bmat->i[j] + shift;
1256         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1257           sum += PetscAbsScalar(*v); v++;
1258         }
1259         if (sum > ntemp) ntemp = sum;
1260       }
1261       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1262     } else {
1263       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1264     }
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNC__
1270 #define __FUNC__ "MatTranspose_MPIAIJ"
1271 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1272 {
1273   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1274   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1275   int        ierr,shift = Aloc->indexshift;
1276   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1277   Mat        B;
1278   Scalar     *array;
1279 
1280   PetscFunctionBegin;
1281   if (matout == PETSC_NULL && M != N) {
1282     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1283   }
1284 
1285   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1286 
1287   /* copy over the A part */
1288   Aloc = (Mat_SeqAIJ*) a->A->data;
1289   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1290   row = a->rstart;
1291   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1292   for ( i=0; i<m; i++ ) {
1293     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1294     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1295   }
1296   aj = Aloc->j;
1297   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1298 
1299   /* copy over the B part */
1300   Aloc = (Mat_SeqAIJ*) a->B->data;
1301   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1302   row = a->rstart;
1303   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1304   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1305   for ( i=0; i<m; i++ ) {
1306     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1307     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1308   }
1309   PetscFree(ct);
1310   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1311   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1312   if (matout != PETSC_NULL) {
1313     *matout = B;
1314   } else {
1315     PetscOps       *Abops;
1316     struct _MatOps *Aops;
1317 
1318     /* This isn't really an in-place transpose .... but free data structures from a */
1319     PetscFree(a->rowners);
1320     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1321     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1322 #if defined (USE_CTABLE)
1323     if (a->colmap) TableDelete(a->colmap);
1324 #else
1325     if (a->colmap) PetscFree(a->colmap);
1326 #endif
1327     if (a->garray) PetscFree(a->garray);
1328     if (a->lvec) VecDestroy(a->lvec);
1329     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1330     PetscFree(a);
1331 
1332     /*
1333        This is horrible, horrible code. We need to keep the
1334       A pointers for the bops and ops but copy everything
1335       else from C.
1336     */
1337     Abops = A->bops;
1338     Aops  = A->ops;
1339     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1340     A->bops = Abops;
1341     A->ops  = Aops;
1342     PetscHeaderDestroy(B);
1343   }
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNC__
1348 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1349 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1350 {
1351   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1352   Mat a = aij->A, b = aij->B;
1353   int ierr,s1,s2,s3;
1354 
1355   PetscFunctionBegin;
1356   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1357   if (rr) {
1358     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1359     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1360     /* Overlap communication with computation. */
1361     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1362   }
1363   if (ll) {
1364     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1365     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1366     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1367   }
1368   /* scale  the diagonal block */
1369   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1370 
1371   if (rr) {
1372     /* Do a scatter end and then right scale the off-diagonal block */
1373     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1374     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1375   }
1376 
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 
1381 #undef __FUNC__
1382 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1383 int MatPrintHelp_MPIAIJ(Mat A)
1384 {
1385   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1386   int        ierr;
1387 
1388   PetscFunctionBegin;
1389   if (!a->rank) {
1390     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNC__
1396 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1397 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1398 {
1399   PetscFunctionBegin;
1400   *bs = 1;
1401   PetscFunctionReturn(0);
1402 }
1403 #undef __FUNC__
1404 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1405 int MatSetUnfactored_MPIAIJ(Mat A)
1406 {
1407   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1408   int        ierr;
1409 
1410   PetscFunctionBegin;
1411   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNC__
1416 #define __FUNC__ "MatEqual_MPIAIJ"
1417 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1418 {
1419   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1420   Mat        a, b, c, d;
1421   PetscTruth flg;
1422   int        ierr;
1423 
1424   PetscFunctionBegin;
1425   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1426   a = matA->A; b = matA->B;
1427   c = matB->A; d = matB->B;
1428 
1429   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1430   if (flg == PETSC_TRUE) {
1431     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1432   }
1433   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNC__
1438 #define __FUNC__ "MatCopy_MPIAIJ"
1439 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1440 {
1441   int        ierr;
1442   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1443   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1444 
1445   PetscFunctionBegin;
1446   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1447     /* because of the column compression in the off-processor part of the matrix a->B,
1448        the number of columns in a->B and b->B may be different, hence we cannot call
1449        the MatCopy() directly on the two parts. If need be, we can provide a more
1450        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1451        then copying the submatrices */
1452     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1453   } else {
1454     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1455     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1456   }
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1461 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1462 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1463 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1464 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1465 
1466 /* -------------------------------------------------------------------*/
1467 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1468        MatGetRow_MPIAIJ,
1469        MatRestoreRow_MPIAIJ,
1470        MatMult_MPIAIJ,
1471        MatMultAdd_MPIAIJ,
1472        MatMultTrans_MPIAIJ,
1473        MatMultTransAdd_MPIAIJ,
1474        0,
1475        0,
1476        0,
1477        0,
1478        0,
1479        0,
1480        MatRelax_MPIAIJ,
1481        MatTranspose_MPIAIJ,
1482        MatGetInfo_MPIAIJ,
1483        MatEqual_MPIAIJ,
1484        MatGetDiagonal_MPIAIJ,
1485        MatDiagonalScale_MPIAIJ,
1486        MatNorm_MPIAIJ,
1487        MatAssemblyBegin_MPIAIJ,
1488        MatAssemblyEnd_MPIAIJ,
1489        0,
1490        MatSetOption_MPIAIJ,
1491        MatZeroEntries_MPIAIJ,
1492        MatZeroRows_MPIAIJ,
1493        0,
1494        0,
1495        0,
1496        0,
1497        MatGetSize_MPIAIJ,
1498        MatGetLocalSize_MPIAIJ,
1499        MatGetOwnershipRange_MPIAIJ,
1500        0,
1501        0,
1502        0,
1503        0,
1504        MatDuplicate_MPIAIJ,
1505        0,
1506        0,
1507        0,
1508        0,
1509        0,
1510        MatGetSubMatrices_MPIAIJ,
1511        MatIncreaseOverlap_MPIAIJ,
1512        MatGetValues_MPIAIJ,
1513        MatCopy_MPIAIJ,
1514        MatPrintHelp_MPIAIJ,
1515        MatScale_MPIAIJ,
1516        0,
1517        0,
1518        0,
1519        MatGetBlockSize_MPIAIJ,
1520        0,
1521        0,
1522        0,
1523        0,
1524        MatFDColoringCreate_MPIAIJ,
1525        0,
1526        MatSetUnfactored_MPIAIJ,
1527        0,
1528        0,
1529        MatGetSubMatrix_MPIAIJ,
1530        0,
1531        0,
1532        MatGetMaps_Petsc};
1533 
1534 /* ----------------------------------------------------------------------------------------*/
1535 
1536 EXTERN_C_BEGIN
1537 #undef __FUNC__
1538 #define __FUNC__ "MatStoreValues_MPIAIJ"
1539 int MatStoreValues_MPIAIJ(Mat mat)
1540 {
1541   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1542   int        ierr;
1543 
1544   PetscFunctionBegin;
1545   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1546   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1547   PetscFunctionReturn(0);
1548 }
1549 EXTERN_C_END
1550 
1551 EXTERN_C_BEGIN
1552 #undef __FUNC__
1553 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1554 int MatRetrieveValues_MPIAIJ(Mat mat)
1555 {
1556   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1557   int        ierr;
1558 
1559   PetscFunctionBegin;
1560   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1561   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1562   PetscFunctionReturn(0);
1563 }
1564 EXTERN_C_END
1565 
1566 #include "pc.h"
1567 EXTERN_C_BEGIN
1568 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1569 EXTERN_C_END
1570 
1571 #undef __FUNC__
1572 #define __FUNC__ "MatCreateMPIAIJ"
1573 /*@C
1574    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1575    (the default parallel PETSc format).  For good matrix assembly performance
1576    the user should preallocate the matrix storage by setting the parameters
1577    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1578    performance can be increased by more than a factor of 50.
1579 
1580    Collective on MPI_Comm
1581 
1582    Input Parameters:
1583 +  comm - MPI communicator
1584 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1585            This value should be the same as the local size used in creating the
1586            y vector for the matrix-vector product y = Ax.
1587 .  n - This value should be the same as the local size used in creating the
1588        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1589        calculated if N is given) For square matrices n is almost always m.
1590 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1591 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1592 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1593            (same value is used for all local rows)
1594 .  d_nnz - array containing the number of nonzeros in the various rows of the
1595            DIAGONAL portion of the local submatrix (possibly different for each row)
1596            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1597            The size of this array is equal to the number of local rows, i.e 'm'.
1598            You must leave room for the diagonal entry even if it is zero.
1599 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1600            submatrix (same value is used for all local rows).
1601 -  o_nnz - array containing the number of nonzeros in the various rows of the
1602            OFF-DIAGONAL portion of the local submatrix (possibly different for
1603            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1604            structure. The size of this array is equal to the number
1605            of local rows, i.e 'm'.
1606 
1607    Output Parameter:
1608 .  A - the matrix
1609 
1610    Notes:
1611    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1612    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1613    storage requirements for this matrix.
1614 
1615    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1616    processor than it must be used on all processors that share the object for
1617    that argument.
1618 
1619    The AIJ format (also called the Yale sparse matrix format or
1620    compressed row storage), is fully compatible with standard Fortran 77
1621    storage.  That is, the stored row and column indices can begin at
1622    either one (as in Fortran) or zero.  See the users manual for details.
1623 
1624    The user MUST specify either the local or global matrix dimensions
1625    (possibly both).
1626 
1627    The parallel matrix is partitioned such that the first m0 rows belong to
1628    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1629    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1630 
1631    The DIAGONAL portion of the local submatrix of a processor can be defined
1632    as the submatrix which is obtained by extraction the part corresponding
1633    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1634    first row that belongs to the processor, and r2 is the last row belonging
1635    to the this processor. This is a square mxm matrix. The remaining portion
1636    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1637 
1638    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1639 
1640    By default, this format uses inodes (identical nodes) when possible.
1641    We search for consecutive rows with the same nonzero structure, thereby
1642    reusing matrix information to achieve increased efficiency.
1643 
1644    Options Database Keys:
1645 +  -mat_aij_no_inode  - Do not use inodes
1646 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1647 -  -mat_aij_oneindex - Internally use indexing starting at 1
1648         rather than 0.  Note that when calling MatSetValues(),
1649         the user still MUST index entries starting at 0!
1650 .   -mat_mpi - use the parallel matrix data structures even on one processor
1651                (defaults to using SeqBAIJ format on one processor)
1652 .   -mat_mpi - use the parallel matrix data structures even on one processor
1653                (defaults to using SeqAIJ format on one processor)
1654 
1655 
1656    Example usage:
1657 
1658    Consider the following 8x8 matrix with 34 non-zero values, that is
1659    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1660    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1661    as follows:
1662 
1663 .vb
1664             1  2  0  |  0  3  0  |  0  4
1665     Proc0   0  5  6  |  7  0  0  |  8  0
1666             9  0 10  | 11  0  0  | 12  0
1667     -------------------------------------
1668            13  0 14  | 15 16 17  |  0  0
1669     Proc1   0 18  0  | 19 20 21  |  0  0
1670             0  0  0  | 22 23  0  | 24  0
1671     -------------------------------------
1672     Proc2  25 26 27  |  0  0 28  | 29  0
1673            30  0  0  | 31 32 33  |  0 34
1674 .ve
1675 
1676    This can be represented as a collection of submatrices as:
1677 
1678 .vb
1679       A B C
1680       D E F
1681       G H I
1682 .ve
1683 
1684    Where the submatrices A,B,C are owned by proc0, D,E,F are
1685    owned by proc1, G,H,I are owned by proc2.
1686 
1687    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1688    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1689    The 'M','N' parameters are 8,8, and have the same values on all procs.
1690 
1691    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1692    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1693    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1694    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1695    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1696    matrix, ans [DF] as another SeqAIJ matrix.
1697 
1698    When d_nz, o_nz parameters are specified, d_nz storage elements are
1699    allocated for every row of the local diagonal submatrix, and o_nz
1700    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1701    One way to choose d_nz and o_nz is to use the max nonzerors per local
1702    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1703    In this case, the values of d_nz,o_nz are:
1704 .vb
1705      proc0 : dnz = 2, o_nz = 2
1706      proc1 : dnz = 3, o_nz = 2
1707      proc2 : dnz = 1, o_nz = 4
1708 .ve
1709    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1710    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1711    for proc3. i.e we are using 12+15+10=37 storage locations to store
1712    34 values.
1713 
1714    When d_nnz, o_nnz parameters are specified, the storage is specified
1715    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1716    In the above case the values for d_nnz,o_nnz are:
1717 .vb
1718      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1719      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1720      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1721 .ve
1722    Here the space allocated is sum of all the above values i.e 34, and
1723    hence pre-allocation is perfect.
1724 
1725    Level: intermediate
1726 
1727 .keywords: matrix, aij, compressed row, sparse, parallel
1728 
1729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1730 @*/
1731 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1732 {
1733   Mat          B;
1734   Mat_MPIAIJ   *b;
1735   int          ierr, i,size,flag1 = 0, flag2 = 0;
1736 
1737   PetscFunctionBegin;
1738   MPI_Comm_size(comm,&size);
1739   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1740   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1741   if (!flag1 && !flag2 && size == 1) {
1742     if (M == PETSC_DECIDE) M = m;
1743     if (N == PETSC_DECIDE) N = n;
1744     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1745     PetscFunctionReturn(0);
1746   }
1747 
1748   *A = 0;
1749   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1750   PLogObjectCreate(B);
1751   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1752   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1753   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1754   B->ops->destroy    = MatDestroy_MPIAIJ;
1755   B->ops->view       = MatView_MPIAIJ;
1756   B->factor          = 0;
1757   B->assembled       = PETSC_FALSE;
1758   B->mapping         = 0;
1759 
1760   B->insertmode      = NOT_SET_VALUES;
1761   b->size            = size;
1762   MPI_Comm_rank(comm,&b->rank);
1763 
1764   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1765     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1766   }
1767 
1768   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1769   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1770   b->m = m; B->m = m;
1771   b->n = n; B->n = n;
1772   b->N = N; B->N = N;
1773   b->M = M; B->M = M;
1774 
1775   /* the information in the maps duplicates the information computed below, eventually
1776      we should remove the duplicate information that is not contained in the maps */
1777   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1778   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1779 
1780   /* build local table of row and column ownerships */
1781   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1782   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1783   b->cowners = b->rowners + b->size + 2;
1784   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1785   b->rowners[0] = 0;
1786   for ( i=2; i<=b->size; i++ ) {
1787     b->rowners[i] += b->rowners[i-1];
1788   }
1789   b->rstart = b->rowners[b->rank];
1790   b->rend   = b->rowners[b->rank+1];
1791   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1792   b->cowners[0] = 0;
1793   for ( i=2; i<=b->size; i++ ) {
1794     b->cowners[i] += b->cowners[i-1];
1795   }
1796   b->cstart = b->cowners[b->rank];
1797   b->cend   = b->cowners[b->rank+1];
1798 
1799   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1800   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1801   PLogObjectParent(B,b->A);
1802   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1803   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1804   PLogObjectParent(B,b->B);
1805 
1806   /* build cache for off array entries formed */
1807   ierr = StashCreate_Private(comm,1,&b->stash); CHKERRQ(ierr);
1808   b->donotstash  = 0;
1809   b->colmap      = 0;
1810   b->garray      = 0;
1811   b->roworiented = 1;
1812 
1813   /* stuff used for matrix vector multiply */
1814   b->lvec      = 0;
1815   b->Mvctx     = 0;
1816 
1817   /* stuff for MatGetRow() */
1818   b->rowindices   = 0;
1819   b->rowvalues    = 0;
1820   b->getrowactive = PETSC_FALSE;
1821 
1822   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1823                                      "MatStoreValues_MPIAIJ",
1824                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1825   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1826                                      "MatRetrieveValues_MPIAIJ",
1827                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1829                                      "MatGetDiagonalBlock_MPIAIJ",
1830                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1831   *A = B;
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #undef __FUNC__
1836 #define __FUNC__ "MatDuplicate_MPIAIJ"
1837 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1838 {
1839   Mat        mat;
1840   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1841   int        ierr, len=0, flg;
1842 
1843   PetscFunctionBegin;
1844   *newmat       = 0;
1845   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1846   PLogObjectCreate(mat);
1847   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1848   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1849   mat->ops->destroy = MatDestroy_MPIAIJ;
1850   mat->ops->view    = MatView_MPIAIJ;
1851   mat->factor       = matin->factor;
1852   mat->assembled    = PETSC_TRUE;
1853   mat->insertmode   = NOT_SET_VALUES;
1854 
1855   a->m = mat->m   = oldmat->m;
1856   a->n = mat->n   = oldmat->n;
1857   a->M = mat->M   = oldmat->M;
1858   a->N = mat->N   = oldmat->N;
1859 
1860   a->rstart       = oldmat->rstart;
1861   a->rend         = oldmat->rend;
1862   a->cstart       = oldmat->cstart;
1863   a->cend         = oldmat->cend;
1864   a->size         = oldmat->size;
1865   a->rank         = oldmat->rank;
1866   a->donotstash   = oldmat->donotstash;
1867   a->roworiented  = oldmat->roworiented;
1868   a->rowindices   = 0;
1869   a->rowvalues    = 0;
1870   a->getrowactive = PETSC_FALSE;
1871 
1872   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1873   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1874   a->cowners = a->rowners + a->size + 2;
1875   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1876   ierr = StashCreate_Private(matin->comm,1,&a->stash); CHKERRQ(ierr);
1877   if (oldmat->colmap) {
1878 #if defined (USE_CTABLE)
1879     ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
1880 #else
1881     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1882     PLogObjectMemory(mat,(a->N)*sizeof(int));
1883     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1884 #endif
1885   } else a->colmap = 0;
1886   if (oldmat->garray) {
1887     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1888     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1889     PLogObjectMemory(mat,len*sizeof(int));
1890     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1891   } else a->garray = 0;
1892 
1893   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1894   PLogObjectParent(mat,a->lvec);
1895   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1896   PLogObjectParent(mat,a->Mvctx);
1897   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
1898   PLogObjectParent(mat,a->A);
1899   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
1900   PLogObjectParent(mat,a->B);
1901   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1902   if (flg) {
1903     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1904   }
1905   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1906   *newmat = mat;
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #include "sys.h"
1911 
1912 #undef __FUNC__
1913 #define __FUNC__ "MatLoad_MPIAIJ"
1914 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1915 {
1916   Mat          A;
1917   Scalar       *vals,*svals;
1918   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1919   MPI_Status   status;
1920   int          i, nz, ierr, j,rstart, rend, fd;
1921   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1922   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1923   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1924 
1925   PetscFunctionBegin;
1926   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1927   if (!rank) {
1928     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1929     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1930     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1931     if (header[3] < 0) {
1932       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1933     }
1934   }
1935 
1936   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1937   M = header[1]; N = header[2];
1938   /* determine ownership of all rows */
1939   m = M/size + ((M % size) > rank);
1940   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1941   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1942   rowners[0] = 0;
1943   for ( i=2; i<=size; i++ ) {
1944     rowners[i] += rowners[i-1];
1945   }
1946   rstart = rowners[rank];
1947   rend   = rowners[rank+1];
1948 
1949   /* distribute row lengths to all processors */
1950   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1951   offlens = ourlens + (rend-rstart);
1952   if (!rank) {
1953     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1954     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1955     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1956     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1957     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1958     PetscFree(sndcounts);
1959   } else {
1960     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1961   }
1962 
1963   if (!rank) {
1964     /* calculate the number of nonzeros on each processor */
1965     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1966     PetscMemzero(procsnz,size*sizeof(int));
1967     for ( i=0; i<size; i++ ) {
1968       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1969         procsnz[i] += rowlengths[j];
1970       }
1971     }
1972     PetscFree(rowlengths);
1973 
1974     /* determine max buffer needed and allocate it */
1975     maxnz = 0;
1976     for ( i=0; i<size; i++ ) {
1977       maxnz = PetscMax(maxnz,procsnz[i]);
1978     }
1979     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1980 
1981     /* read in my part of the matrix column indices  */
1982     nz     = procsnz[0];
1983     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1984     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1985 
1986     /* read in every one elses and ship off */
1987     for ( i=1; i<size; i++ ) {
1988       nz   = procsnz[i];
1989       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1990       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1991     }
1992     PetscFree(cols);
1993   } else {
1994     /* determine buffer space needed for message */
1995     nz = 0;
1996     for ( i=0; i<m; i++ ) {
1997       nz += ourlens[i];
1998     }
1999     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2000 
2001     /* receive message of column indices*/
2002     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2003     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2004     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2005   }
2006 
2007   /* determine column ownership if matrix is not square */
2008   if (N != M) {
2009     n      = N/size + ((N % size) > rank);
2010     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2011     cstart = cend - n;
2012   } else {
2013     cstart = rstart;
2014     cend   = rend;
2015     n      = cend - cstart;
2016   }
2017 
2018   /* loop over local rows, determining number of off diagonal entries */
2019   PetscMemzero(offlens,m*sizeof(int));
2020   jj = 0;
2021   for ( i=0; i<m; i++ ) {
2022     for ( j=0; j<ourlens[i]; j++ ) {
2023       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2024       jj++;
2025     }
2026   }
2027 
2028   /* create our matrix */
2029   for ( i=0; i<m; i++ ) {
2030     ourlens[i] -= offlens[i];
2031   }
2032   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2033   A = *newmat;
2034   ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr);
2035   for ( i=0; i<m; i++ ) {
2036     ourlens[i] += offlens[i];
2037   }
2038 
2039   if (!rank) {
2040     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2041 
2042     /* read in my part of the matrix numerical values  */
2043     nz   = procsnz[0];
2044     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2045 
2046     /* insert into matrix */
2047     jj      = rstart;
2048     smycols = mycols;
2049     svals   = vals;
2050     for ( i=0; i<m; i++ ) {
2051       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2052       smycols += ourlens[i];
2053       svals   += ourlens[i];
2054       jj++;
2055     }
2056 
2057     /* read in other processors and ship out */
2058     for ( i=1; i<size; i++ ) {
2059       nz   = procsnz[i];
2060       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2061       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2062     }
2063     PetscFree(procsnz);
2064   } else {
2065     /* receive numeric values */
2066     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2067 
2068     /* receive message of values*/
2069     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2070     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2071     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2072 
2073     /* insert into matrix */
2074     jj      = rstart;
2075     smycols = mycols;
2076     svals   = vals;
2077     for ( i=0; i<m; i++ ) {
2078       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2079       smycols += ourlens[i];
2080       svals   += ourlens[i];
2081       jj++;
2082     }
2083   }
2084   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2085 
2086   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2087   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNC__
2092 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2093 /*
2094     Not great since it makes two copies of the submatrix, first an SeqAIJ
2095   in local and then by concatenating the local matrices the end result.
2096   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2097 */
2098 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2099 {
2100   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2101   Mat        *local,M, Mreuse;
2102   Scalar     *vwork,*aa;
2103   MPI_Comm   comm = mat->comm;
2104   Mat_SeqAIJ *aij;
2105   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2106 
2107   PetscFunctionBegin;
2108   MPI_Comm_rank(comm,&rank);
2109   MPI_Comm_size(comm,&size);
2110 
2111   if (call ==  MAT_REUSE_MATRIX) {
2112     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2113     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2114     local = &Mreuse;
2115     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2116   } else {
2117     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2118     Mreuse = *local;
2119     PetscFree(local);
2120   }
2121 
2122   /*
2123       m - number of local rows
2124       n - number of columns (same on all processors)
2125       rstart - first row in new global matrix generated
2126   */
2127   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2128   if (call == MAT_INITIAL_MATRIX) {
2129     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2130     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2131     ii  = aij->i;
2132     jj  = aij->j;
2133 
2134     /*
2135         Determine the number of non-zeros in the diagonal and off-diagonal
2136         portions of the matrix in order to do correct preallocation
2137     */
2138 
2139     /* first get start and end of "diagonal" columns */
2140     if (csize == PETSC_DECIDE) {
2141       nlocal = n/size + ((n % size) > rank);
2142     } else {
2143       nlocal = csize;
2144     }
2145     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2146     rstart = rend - nlocal;
2147     if (rank == size - 1 && rend != n) {
2148       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2149     }
2150 
2151     /* next, compute all the lengths */
2152     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2153     olens = dlens + m;
2154     for ( i=0; i<m; i++ ) {
2155       jend = ii[i+1] - ii[i];
2156       olen = 0;
2157       dlen = 0;
2158       for ( j=0; j<jend; j++ ) {
2159         if ( *jj < rstart || *jj >= rend) olen++;
2160         else dlen++;
2161         jj++;
2162       }
2163       olens[i] = olen;
2164       dlens[i] = dlen;
2165     }
2166     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2167     PetscFree(dlens);
2168   } else {
2169     int ml,nl;
2170 
2171     M = *newmat;
2172     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2173     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2174     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2175     /*
2176          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2177        rather than the slower MatSetValues().
2178     */
2179     M->was_assembled = PETSC_TRUE;
2180     M->assembled     = PETSC_FALSE;
2181   }
2182   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2183   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2184   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2185   ii  = aij->i;
2186   jj  = aij->j;
2187   aa  = aij->a;
2188   for (i=0; i<m; i++) {
2189     row   = rstart + i;
2190     nz    = ii[i+1] - ii[i];
2191     cwork = jj;     jj += nz;
2192     vwork = aa;     aa += nz;
2193     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2194   }
2195 
2196   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2197   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2198   *newmat = M;
2199 
2200   /* save submatrix used in processor for next request */
2201   if (call ==  MAT_INITIAL_MATRIX) {
2202     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2203     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2204   }
2205 
2206   PetscFunctionReturn(0);
2207 }
2208