xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision be5d1d56a128fdbca06f8d9818f1d611ccde2ba2)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 int CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
18   int        n = aij->B->n,i,ierr;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
191   int          cstart = aij->cstart,cend = aij->cend,row,col;
192   PetscTruth   roworiented = aij->roworiented;
193 
194   /* Some Variables required in the macro */
195   Mat          A = aij->A;
196   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
197   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
198   PetscScalar  *aa = a->a;
199   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
200   Mat          B = aij->B;
201   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
202   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
203   PetscScalar  *ba = b->a;
204 
205   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
206   int          nonew = a->nonew,shift=0;
207   PetscScalar  *ap;
208 
209   PetscFunctionBegin;
210   for (i=0; i<m; i++) {
211     if (im[i] < 0) continue;
212 #if defined(PETSC_USE_BOPT_g)
213     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
214 #endif
215     if (im[i] >= rstart && im[i] < rend) {
216       row = im[i] - rstart;
217       for (j=0; j<n; j++) {
218         if (in[j] >= cstart && in[j] < cend){
219           col = in[j] - cstart;
220           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
221           if (ignorezeroentries && value == 0.0) continue;
222           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
223           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
224         } else if (in[j] < 0) continue;
225 #if defined(PETSC_USE_BOPT_g)
226         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
227 #endif
228         else {
229           if (mat->was_assembled) {
230             if (!aij->colmap) {
231               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
232             }
233 #if defined (PETSC_USE_CTABLE)
234             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
235 	    col--;
236 #else
237             col = aij->colmap[in[j]] - 1;
238 #endif
239             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
240               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
241               col =  in[j];
242               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
243               B = aij->B;
244               b = (Mat_SeqAIJ*)B->data;
245               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
246               ba = b->a;
247             }
248           } else col = in[j];
249           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
250           if (ignorezeroentries && value == 0.0) continue;
251           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (!aij->donotstash) {
257         if (roworiented) {
258           if (ignorezeroentries && v[i*n] == 0.0) continue;
259           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
260         } else {
261           if (ignorezeroentries && v[i] == 0.0) continue;
262           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
263         }
264       }
265     }
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatGetValues_MPIAIJ"
272 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
275   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
276   int        cstart = aij->cstart,cend = aij->cend,row,col;
277 
278   PetscFunctionBegin;
279   for (i=0; i<m; i++) {
280     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
281     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
282     if (idxm[i] >= rstart && idxm[i] < rend) {
283       row = idxm[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
286         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
287         if (idxn[j] >= cstart && idxn[j] < cend){
288           col = idxn[j] - cstart;
289           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
290         } else {
291           if (!aij->colmap) {
292             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
293           }
294 #if defined (PETSC_USE_CTABLE)
295           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
296           col --;
297 #else
298           col = aij->colmap[idxn[j]] - 1;
299 #endif
300           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
301           else {
302             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
303           }
304         }
305       }
306     } else {
307       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316 {
317   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
318   int         ierr,nstash,reallocs;
319   InsertMode  addv;
320 
321   PetscFunctionBegin;
322   if (aij->donotstash) {
323     PetscFunctionReturn(0);
324   }
325 
326   /* make sure all processors are either in INSERTMODE or ADDMODE */
327   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
328   if (addv == (ADD_VALUES|INSERT_VALUES)) {
329     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
330   }
331   mat->insertmode = addv; /* in case this processor had no cache */
332 
333   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
334   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
335   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
336   PetscFunctionReturn(0);
337 }
338 
339 
340 #undef __FUNCT__
341 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
343 {
344   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
345   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
346   int         i,j,rstart,ncols,n,ierr,flg;
347   int         *row,*col,other_disassembled;
348   PetscScalar *val;
349   InsertMode  addv = mat->insertmode;
350 
351   PetscFunctionBegin;
352   if (!aij->donotstash) {
353     while (1) {
354       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
355       if (!flg) break;
356 
357       for (i=0; i<n;) {
358         /* Now identify the consecutive vals belonging to the same row */
359         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
360         if (j < n) ncols = j-i;
361         else       ncols = n-i;
362         /* Now assemble all these values with a single function call */
363         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
364         i = j;
365       }
366     }
367     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
368   }
369 
370   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
371   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
372 
373   /* determine if any processor has disassembled, if so we must
374      also disassemble ourselfs, in order that we may reassemble. */
375   /*
376      if nonzero structure of submatrix B cannot change then we know that
377      no processor disassembled thus we can skip this stuff
378   */
379   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
380     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
381     if (mat->was_assembled && !other_disassembled) {
382       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
383     }
384   }
385 
386   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
387     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
388   }
389   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
390   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
391 
392   if (aij->rowvalues) {
393     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
394     aij->rowvalues = 0;
395   }
396 
397   /* used by MatAXPY() */
398   a->xtoy = 0; b->xtoy = 0;
399   a->XtoY = 0; b->XtoY = 0;
400 
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
422   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
423   int            *nprocs,j,idx,nsends,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431   PetscTruth     found;
432 
433   PetscFunctionBegin;
434   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
435   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
436 
437   /*  first count number of contributors to each processor */
438   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
439   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
440   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
441   for (i=0; i<N; i++) {
442     idx = rows[i];
443     found = PETSC_FALSE;
444     for (j=0; j<size; j++) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
450   }
451   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
452 
453   /* inform other processors of number of messages and max length*/
454   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
455 
456   /* post receives:   */
457   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
458   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
459   for (i=0; i<nrecvs; i++) {
460     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
461   }
462 
463   /* do sends:
464       1) starts[i] gives the starting index in svalues for stuff going to
465          the ith processor
466   */
467   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
468   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
469   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
470   starts[0] = 0;
471   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
472   for (i=0; i<N; i++) {
473     svalues[starts[owner[i]]++] = rows[i];
474   }
475   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
476 
477   starts[0] = 0;
478   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
479   count = 0;
480   for (i=0; i<size; i++) {
481     if (nprocs[2*i+1]) {
482       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
483     }
484   }
485   ierr = PetscFree(starts);CHKERRQ(ierr);
486 
487   base = owners[rank];
488 
489   /*  wait on receives */
490   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
491   source = lens + nrecvs;
492   count  = nrecvs; slen = 0;
493   while (count) {
494     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
495     /* unpack receives into our local space */
496     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
497     source[imdex]  = recv_status.MPI_SOURCE;
498     lens[imdex]    = n;
499     slen          += n;
500     count--;
501   }
502   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
503 
504   /* move the data into the send scatter */
505   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
506   count = 0;
507   for (i=0; i<nrecvs; i++) {
508     values = rvalues + i*nmax;
509     for (j=0; j<lens[i]; j++) {
510       lrows[count++] = values[j] - base;
511     }
512   }
513   ierr = PetscFree(rvalues);CHKERRQ(ierr);
514   ierr = PetscFree(lens);CHKERRQ(ierr);
515   ierr = PetscFree(owner);CHKERRQ(ierr);
516   ierr = PetscFree(nprocs);CHKERRQ(ierr);
517 
518   /* actually zap the local rows */
519   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
520   PetscLogObjectParent(A,istmp);
521 
522   /*
523         Zero the required rows. If the "diagonal block" of the matrix
524      is square and the user wishes to set the diagonal we use seperate
525      code so that MatSetValues() is not called for each diagonal allocating
526      new memory, thus calling lots of mallocs and slowing things down.
527 
528        Contributed by: Mathew Knepley
529   */
530   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
531   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
532   if (diag && (l->A->M == l->A->N)) {
533     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
534   } else if (diag) {
535     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
536     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
537       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
538 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
539     }
540     for (i = 0; i < slen; i++) {
541       row  = lrows[i] + rstart;
542       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
543     }
544     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546   } else {
547     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
548   }
549   ierr = ISDestroy(istmp);CHKERRQ(ierr);
550   ierr = PetscFree(lrows);CHKERRQ(ierr);
551 
552   /* wait on sends */
553   if (nsends) {
554     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
555     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
556     ierr = PetscFree(send_status);CHKERRQ(ierr);
557   }
558   ierr = PetscFree(send_waits);CHKERRQ(ierr);
559   ierr = PetscFree(svalues);CHKERRQ(ierr);
560 
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatMult_MPIAIJ"
566 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
567 {
568   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
569   int        ierr,nt;
570 
571   PetscFunctionBegin;
572   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
573   if (nt != A->n) {
574     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
575   }
576   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
577   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
578   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
579   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatMultAdd_MPIAIJ"
585 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
586 {
587   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
588   int        ierr;
589 
590   PetscFunctionBegin;
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
600 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   /* do nondiagonal part */
607   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
608   /* send it on its way */
609   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
610   /* do local part */
611   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
612   /* receive remote parts: note this assumes the values are not actually */
613   /* inserted in yy until the next line, which is true for my implementation*/
614   /* but is not perhaps always true. */
615   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 EXTERN_C_BEGIN
620 #undef __FUNCT__
621 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
622 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
623 {
624   MPI_Comm comm;
625   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
626   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
627   IS         Me,Notme;
628   int        M,N,first,last,*notme,ntids,i, ierr;
629 
630   PetscFunctionBegin;
631 
632   /* Easy test: symmetric diagonal block */
633   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
634   ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr);
635   if (!*f) PetscFunctionReturn(0);
636   ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr);
637   ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr);
638   if (ntids==1) PetscFunctionReturn(0);
639 
640   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
641   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
642   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
643   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
644   for (i=0; i<first; i++) notme[i] = i;
645   for (i=last; i<M; i++) notme[i-last+first] = i;
646   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
647   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
648   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
649   Aoff = Aoffs[0];
650   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
651   Boff = Boffs[0];
652   ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr);
653   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
654   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
655   ierr = ISDestroy(Me); CHKERRQ(ierr);
656   ierr = ISDestroy(Notme); CHKERRQ(ierr);
657 
658   PetscFunctionReturn(0);
659 }
660 EXTERN_C_END
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
664 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
665 {
666   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
667   int        ierr;
668 
669   PetscFunctionBegin;
670   /* do nondiagonal part */
671   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
672   /* send it on its way */
673   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
674   /* do local part */
675   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
676   /* receive remote parts: note this assumes the values are not actually */
677   /* inserted in yy until the next line, which is true for my implementation*/
678   /* but is not perhaps always true. */
679   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
680   PetscFunctionReturn(0);
681 }
682 
683 /*
684   This only works correctly for square matrices where the subblock A->A is the
685    diagonal block
686 */
687 #undef __FUNCT__
688 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
689 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
690 {
691   int        ierr;
692   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
693 
694   PetscFunctionBegin;
695   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
696   if (a->rstart != a->cstart || a->rend != a->cend) {
697     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
698   }
699   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatScale_MPIAIJ"
705 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
706 {
707   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
708   int        ierr;
709 
710   PetscFunctionBegin;
711   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
712   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatDestroy_MPIAIJ"
718 int MatDestroy_MPIAIJ(Mat mat)
719 {
720   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
721   int        ierr;
722 
723   PetscFunctionBegin;
724 #if defined(PETSC_USE_LOG)
725   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
726 #endif
727   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
728   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
729   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
730   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
731 #if defined (PETSC_USE_CTABLE)
732   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
733 #else
734   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
735 #endif
736   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
737   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
738   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
739   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
740   ierr = PetscFree(aij);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatView_MPIAIJ_Binary"
746 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
747 {
748   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
749   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
750   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
751   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
752   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
753   PetscScalar       *column_values;
754 
755   PetscFunctionBegin;
756   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
757   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
758   nz   = A->nz + B->nz;
759   if (rank == 0) {
760     header[0] = MAT_FILE_COOKIE;
761     header[1] = mat->M;
762     header[2] = mat->N;
763     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
764     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
765     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
766     /* get largest number of rows any processor has */
767     rlen = mat->m;
768     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
769     for (i=1; i<size; i++) {
770       rlen = PetscMax(rlen,range[i+1] - range[i]);
771     }
772   } else {
773     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
774     rlen = mat->m;
775   }
776 
777   /* load up the local row counts */
778   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
779   for (i=0; i<mat->m; i++) {
780     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781   }
782 
783   /* store the row lengths to the file */
784   if (rank == 0) {
785     MPI_Status status;
786     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
787     for (i=1; i<size; i++) {
788       rlen = range[i+1] - range[i];
789       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
790       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
791     }
792   } else {
793     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
794   }
795   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
796 
797   /* load up the local column indices */
798   nzmax = nz; /* )th processor needs space a largest processor needs */
799   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
800   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
801   cnt  = 0;
802   for (i=0; i<mat->m; i++) {
803     for (j=B->i[i]; j<B->i[i+1]; j++) {
804       if ( (col = garray[B->j[j]]) > cstart) break;
805       column_indices[cnt++] = col;
806     }
807     for (k=A->i[i]; k<A->i[i+1]; k++) {
808       column_indices[cnt++] = A->j[k] + cstart;
809     }
810     for (; j<B->i[i+1]; j++) {
811       column_indices[cnt++] = garray[B->j[j]];
812     }
813   }
814   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
815 
816   /* store the column indices to the file */
817   if (rank == 0) {
818     MPI_Status status;
819     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
820     for (i=1; i<size; i++) {
821       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
823       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
824       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
825     }
826   } else {
827     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
828     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(column_indices);CHKERRQ(ierr);
831 
832   /* load up the local column values */
833   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
834   cnt  = 0;
835   for (i=0; i<mat->m; i++) {
836     for (j=B->i[i]; j<B->i[i+1]; j++) {
837       if ( garray[B->j[j]] > cstart) break;
838       column_values[cnt++] = B->a[j];
839     }
840     for (k=A->i[i]; k<A->i[i+1]; k++) {
841       column_values[cnt++] = A->a[k];
842     }
843     for (; j<B->i[i+1]; j++) {
844       column_values[cnt++] = B->a[j];
845     }
846   }
847   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
848 
849   /* store the column values to the file */
850   if (rank == 0) {
851     MPI_Status status;
852     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
853     for (i=1; i<size; i++) {
854       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
856       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
858     }
859   } else {
860     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
861     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(column_values);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
869 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
872   int               ierr,rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,isascii,flg,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (isascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo info;
885       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
886       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
887       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
888       if (flg) {
889         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
890 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
891       } else {
892         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
893 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
894       }
895       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
896       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
897       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
899       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
900       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
901       PetscFunctionReturn(0);
902     } else if (format == PETSC_VIEWER_ASCII_INFO) {
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
905       PetscFunctionReturn(0);
906     }
907   } else if (isbinary) {
908     if (size == 1) {
909       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
910       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
911     } else {
912       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
913     }
914     PetscFunctionReturn(0);
915   } else if (isdraw) {
916     PetscDraw  draw;
917     PetscTruth isnull;
918     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
919     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
920   }
921 
922   if (size == 1) {
923     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
924     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
925   } else {
926     /* assemble the entire matrix onto first processor. */
927     Mat         A;
928     Mat_SeqAIJ *Aloc;
929     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
930     PetscScalar *a;
931 
932     if (!rank) {
933       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
934     } else {
935       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
936     }
937     PetscLogObjectParent(mat,A);
938 
939     /* copy over the A part */
940     Aloc = (Mat_SeqAIJ*)aij->A->data;
941     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
942     row = aij->rstart;
943     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
944     for (i=0; i<m; i++) {
945       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
946       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
947     }
948     aj = Aloc->j;
949     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
950 
951     /* copy over the B part */
952     Aloc = (Mat_SeqAIJ*)aij->B->data;
953     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row  = aij->rstart;
955     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
956     ct   = cols;
957     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
961     }
962     ierr = PetscFree(ct);CHKERRQ(ierr);
963     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
964     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
965     /*
966        Everyone has to call to draw the matrix since the graphics waits are
967        synchronized across all processors that share the PetscDraw object
968     */
969     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
970     if (!rank) {
971       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
972       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
973     }
974     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
975     ierr = MatDestroy(A);CHKERRQ(ierr);
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatView_MPIAIJ"
982 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
983 {
984   int        ierr;
985   PetscTruth isascii,isdraw,issocket,isbinary;
986 
987   PetscFunctionBegin;
988   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
989   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
990   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
991   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
992   if (isascii || isdraw || isbinary || issocket) {
993     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
994   } else {
995     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
996   }
997   PetscFunctionReturn(0);
998 }
999 
1000 
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatRelax_MPIAIJ"
1004 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1005 {
1006   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1007   int          ierr;
1008   Vec          bb1;
1009   PetscScalar  mone=-1.0;
1010 
1011   PetscFunctionBegin;
1012   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1013 
1014   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1015 
1016   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1017     if (flag & SOR_ZERO_INITIAL_GUESS) {
1018       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1019       its--;
1020     }
1021 
1022     while (its--) {
1023       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1024       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1025 
1026       /* update rhs: bb1 = bb - B*x */
1027       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1028       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1029 
1030       /* local sweep */
1031       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1032       CHKERRQ(ierr);
1033     }
1034   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1035     if (flag & SOR_ZERO_INITIAL_GUESS) {
1036       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1037       its--;
1038     }
1039     while (its--) {
1040       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1042 
1043       /* update rhs: bb1 = bb - B*x */
1044       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1045       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1046 
1047       /* local sweep */
1048       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1049       CHKERRQ(ierr);
1050     }
1051   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1052     if (flag & SOR_ZERO_INITIAL_GUESS) {
1053       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1054       its--;
1055     }
1056     while (its--) {
1057       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1058       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1059 
1060       /* update rhs: bb1 = bb - B*x */
1061       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1062       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1063 
1064       /* local sweep */
1065       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1066       CHKERRQ(ierr);
1067     }
1068   } else {
1069     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1070   }
1071 
1072   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1078 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1079 {
1080   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1081   Mat        A = mat->A,B = mat->B;
1082   int        ierr;
1083   PetscReal  isend[5],irecv[5];
1084 
1085   PetscFunctionBegin;
1086   info->block_size     = 1.0;
1087   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1088   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1089   isend[3] = info->memory;  isend[4] = info->mallocs;
1090   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1091   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1092   isend[3] += info->memory;  isend[4] += info->mallocs;
1093   if (flag == MAT_LOCAL) {
1094     info->nz_used      = isend[0];
1095     info->nz_allocated = isend[1];
1096     info->nz_unneeded  = isend[2];
1097     info->memory       = isend[3];
1098     info->mallocs      = isend[4];
1099   } else if (flag == MAT_GLOBAL_MAX) {
1100     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1101     info->nz_used      = irecv[0];
1102     info->nz_allocated = irecv[1];
1103     info->nz_unneeded  = irecv[2];
1104     info->memory       = irecv[3];
1105     info->mallocs      = irecv[4];
1106   } else if (flag == MAT_GLOBAL_SUM) {
1107     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1108     info->nz_used      = irecv[0];
1109     info->nz_allocated = irecv[1];
1110     info->nz_unneeded  = irecv[2];
1111     info->memory       = irecv[3];
1112     info->mallocs      = irecv[4];
1113   }
1114   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1115   info->fill_ratio_needed = 0;
1116   info->factor_mallocs    = 0;
1117   info->rows_global       = (double)matin->M;
1118   info->columns_global    = (double)matin->N;
1119   info->rows_local        = (double)matin->m;
1120   info->columns_local     = (double)matin->N;
1121 
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatSetOption_MPIAIJ"
1127 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1128 {
1129   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1130   int        ierr;
1131 
1132   PetscFunctionBegin;
1133   switch (op) {
1134   case MAT_NO_NEW_NONZERO_LOCATIONS:
1135   case MAT_YES_NEW_NONZERO_LOCATIONS:
1136   case MAT_COLUMNS_UNSORTED:
1137   case MAT_COLUMNS_SORTED:
1138   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1139   case MAT_KEEP_ZEROED_ROWS:
1140   case MAT_NEW_NONZERO_LOCATION_ERR:
1141   case MAT_USE_INODES:
1142   case MAT_DO_NOT_USE_INODES:
1143   case MAT_IGNORE_ZERO_ENTRIES:
1144     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1145     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1146     break;
1147   case MAT_ROW_ORIENTED:
1148     a->roworiented = PETSC_TRUE;
1149     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1150     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1151     break;
1152   case MAT_ROWS_SORTED:
1153   case MAT_ROWS_UNSORTED:
1154   case MAT_YES_NEW_DIAGONALS:
1155     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1156     break;
1157   case MAT_COLUMN_ORIENTED:
1158     a->roworiented = PETSC_FALSE;
1159     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1160     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1161     break;
1162   case MAT_IGNORE_OFF_PROC_ENTRIES:
1163     a->donotstash = PETSC_TRUE;
1164     break;
1165   case MAT_NO_NEW_DIAGONALS:
1166     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1167   case MAT_SYMMETRIC:
1168   case MAT_STRUCTURALLY_SYMMETRIC:
1169   case MAT_NOT_SYMMETRIC:
1170   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1171   case MAT_HERMITIAN:
1172   case MAT_NOT_HERMITIAN:
1173   case MAT_SYMMETRY_ETERNAL:
1174   case MAT_NOT_SYMMETRY_ETERNAL:
1175     break;
1176   default:
1177     SETERRQ(PETSC_ERR_SUP,"unknown option");
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatGetRow_MPIAIJ"
1184 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1185 {
1186   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1187   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1188   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1189   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1190   int          *cmap,*idx_p;
1191 
1192   PetscFunctionBegin;
1193   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1194   mat->getrowactive = PETSC_TRUE;
1195 
1196   if (!mat->rowvalues && (idx || v)) {
1197     /*
1198         allocate enough space to hold information from the longest row.
1199     */
1200     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1201     int     max = 1,tmp;
1202     for (i=0; i<matin->m; i++) {
1203       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1204       if (max < tmp) { max = tmp; }
1205     }
1206     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1207     mat->rowindices = (int*)(mat->rowvalues + max);
1208   }
1209 
1210   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1211   lrow = row - rstart;
1212 
1213   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1214   if (!v)   {pvA = 0; pvB = 0;}
1215   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1216   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1217   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1218   nztot = nzA + nzB;
1219 
1220   cmap  = mat->garray;
1221   if (v  || idx) {
1222     if (nztot) {
1223       /* Sort by increasing column numbers, assuming A and B already sorted */
1224       int imark = -1;
1225       if (v) {
1226         *v = v_p = mat->rowvalues;
1227         for (i=0; i<nzB; i++) {
1228           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1229           else break;
1230         }
1231         imark = i;
1232         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1233         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1234       }
1235       if (idx) {
1236         *idx = idx_p = mat->rowindices;
1237         if (imark > -1) {
1238           for (i=0; i<imark; i++) {
1239             idx_p[i] = cmap[cworkB[i]];
1240           }
1241         } else {
1242           for (i=0; i<nzB; i++) {
1243             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1244             else break;
1245           }
1246           imark = i;
1247         }
1248         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1249         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1250       }
1251     } else {
1252       if (idx) *idx = 0;
1253       if (v)   *v   = 0;
1254     }
1255   }
1256   *nz = nztot;
1257   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1258   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1264 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1265 {
1266   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1267 
1268   PetscFunctionBegin;
1269   if (aij->getrowactive == PETSC_FALSE) {
1270     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1271   }
1272   aij->getrowactive = PETSC_FALSE;
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatNorm_MPIAIJ"
1278 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1279 {
1280   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1281   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1282   int          ierr,i,j,cstart = aij->cstart;
1283   PetscReal    sum = 0.0;
1284   PetscScalar  *v;
1285 
1286   PetscFunctionBegin;
1287   if (aij->size == 1) {
1288     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1289   } else {
1290     if (type == NORM_FROBENIUS) {
1291       v = amat->a;
1292       for (i=0; i<amat->nz; i++) {
1293 #if defined(PETSC_USE_COMPLEX)
1294         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1295 #else
1296         sum += (*v)*(*v); v++;
1297 #endif
1298       }
1299       v = bmat->a;
1300       for (i=0; i<bmat->nz; i++) {
1301 #if defined(PETSC_USE_COMPLEX)
1302         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1303 #else
1304         sum += (*v)*(*v); v++;
1305 #endif
1306       }
1307       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1308       *norm = sqrt(*norm);
1309     } else if (type == NORM_1) { /* max column norm */
1310       PetscReal *tmp,*tmp2;
1311       int    *jj,*garray = aij->garray;
1312       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1313       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1314       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1315       *norm = 0.0;
1316       v = amat->a; jj = amat->j;
1317       for (j=0; j<amat->nz; j++) {
1318         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1319       }
1320       v = bmat->a; jj = bmat->j;
1321       for (j=0; j<bmat->nz; j++) {
1322         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1323       }
1324       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1325       for (j=0; j<mat->N; j++) {
1326         if (tmp2[j] > *norm) *norm = tmp2[j];
1327       }
1328       ierr = PetscFree(tmp);CHKERRQ(ierr);
1329       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1330     } else if (type == NORM_INFINITY) { /* max row norm */
1331       PetscReal ntemp = 0.0;
1332       for (j=0; j<aij->A->m; j++) {
1333         v = amat->a + amat->i[j];
1334         sum = 0.0;
1335         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1336           sum += PetscAbsScalar(*v); v++;
1337         }
1338         v = bmat->a + bmat->i[j];
1339         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1340           sum += PetscAbsScalar(*v); v++;
1341         }
1342         if (sum > ntemp) ntemp = sum;
1343       }
1344       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1345     } else {
1346       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1347     }
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatTranspose_MPIAIJ"
1354 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1355 {
1356   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1357   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1358   int          ierr;
1359   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1360   Mat          B;
1361   PetscScalar  *array;
1362 
1363   PetscFunctionBegin;
1364   if (!matout && M != N) {
1365     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1366   }
1367 
1368   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1369 
1370   /* copy over the A part */
1371   Aloc = (Mat_SeqAIJ*)a->A->data;
1372   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1373   row = a->rstart;
1374   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1375   for (i=0; i<m; i++) {
1376     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1377     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1378   }
1379   aj = Aloc->j;
1380   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1381 
1382   /* copy over the B part */
1383   Aloc = (Mat_SeqAIJ*)a->B->data;
1384   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1385   row  = a->rstart;
1386   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1387   ct   = cols;
1388   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1389   for (i=0; i<m; i++) {
1390     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1391     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1392   }
1393   ierr = PetscFree(ct);CHKERRQ(ierr);
1394   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1396   if (matout) {
1397     *matout = B;
1398   } else {
1399     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1406 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1407 {
1408   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1409   Mat        a = aij->A,b = aij->B;
1410   int        ierr,s1,s2,s3;
1411 
1412   PetscFunctionBegin;
1413   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1414   if (rr) {
1415     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1416     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1417     /* Overlap communication with computation. */
1418     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1419   }
1420   if (ll) {
1421     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1422     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1423     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1424   }
1425   /* scale  the diagonal block */
1426   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1427 
1428   if (rr) {
1429     /* Do a scatter end and then right scale the off-diagonal block */
1430     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1431     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1432   }
1433 
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1440 int MatPrintHelp_MPIAIJ(Mat A)
1441 {
1442   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1443   int        ierr;
1444 
1445   PetscFunctionBegin;
1446   if (!a->rank) {
1447     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1448   }
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1454 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1455 {
1456   PetscFunctionBegin;
1457   *bs = 1;
1458   PetscFunctionReturn(0);
1459 }
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1462 int MatSetUnfactored_MPIAIJ(Mat A)
1463 {
1464   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1465   int        ierr;
1466 
1467   PetscFunctionBegin;
1468   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatEqual_MPIAIJ"
1474 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1475 {
1476   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1477   Mat        a,b,c,d;
1478   PetscTruth flg;
1479   int        ierr;
1480 
1481   PetscFunctionBegin;
1482   a = matA->A; b = matA->B;
1483   c = matB->A; d = matB->B;
1484 
1485   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1486   if (flg == PETSC_TRUE) {
1487     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1488   }
1489   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatCopy_MPIAIJ"
1495 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1496 {
1497   int        ierr;
1498   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1499   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1500 
1501   PetscFunctionBegin;
1502   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1503   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1504     /* because of the column compression in the off-processor part of the matrix a->B,
1505        the number of columns in a->B and b->B may be different, hence we cannot call
1506        the MatCopy() directly on the two parts. If need be, we can provide a more
1507        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1508        then copying the submatrices */
1509     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1510   } else {
1511     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1512     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1513   }
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1519 int MatSetUpPreallocation_MPIAIJ(Mat A)
1520 {
1521   int        ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #include "petscblaslapack.h"
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatAXPY_MPIAIJ"
1531 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1532 {
1533   int        ierr,one=1,i;
1534   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1535   Mat_SeqAIJ *x,*y;
1536 
1537   PetscFunctionBegin;
1538   if (str == SAME_NONZERO_PATTERN) {
1539     x = (Mat_SeqAIJ *)xx->A->data;
1540     y = (Mat_SeqAIJ *)yy->A->data;
1541     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1542     x = (Mat_SeqAIJ *)xx->B->data;
1543     y = (Mat_SeqAIJ *)yy->B->data;
1544     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1545   } else if (str == SUBSET_NONZERO_PATTERN) {
1546     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1547 
1548     x = (Mat_SeqAIJ *)xx->B->data;
1549     y = (Mat_SeqAIJ *)yy->B->data;
1550     if (y->xtoy && y->XtoY != xx->B) {
1551       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1552       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1553     }
1554     if (!y->xtoy) { /* get xtoy */
1555       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1556       y->XtoY = xx->B;
1557     }
1558     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1559   } else {
1560     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 /* -------------------------------------------------------------------*/
1566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1567        MatGetRow_MPIAIJ,
1568        MatRestoreRow_MPIAIJ,
1569        MatMult_MPIAIJ,
1570 /* 4*/ MatMultAdd_MPIAIJ,
1571        MatMultTranspose_MPIAIJ,
1572        MatMultTransposeAdd_MPIAIJ,
1573        0,
1574        0,
1575        0,
1576 /*10*/ 0,
1577        0,
1578        0,
1579        MatRelax_MPIAIJ,
1580        MatTranspose_MPIAIJ,
1581 /*15*/ MatGetInfo_MPIAIJ,
1582        MatEqual_MPIAIJ,
1583        MatGetDiagonal_MPIAIJ,
1584        MatDiagonalScale_MPIAIJ,
1585        MatNorm_MPIAIJ,
1586 /*20*/ MatAssemblyBegin_MPIAIJ,
1587        MatAssemblyEnd_MPIAIJ,
1588        0,
1589        MatSetOption_MPIAIJ,
1590        MatZeroEntries_MPIAIJ,
1591 /*25*/ MatZeroRows_MPIAIJ,
1592 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1593        MatLUFactorSymbolic_MPIAIJ_TFS,
1594 #else
1595        0,
1596 #endif
1597        0,
1598        0,
1599        0,
1600 /*30*/ MatSetUpPreallocation_MPIAIJ,
1601        0,
1602        0,
1603        0,
1604        0,
1605 /*35*/ MatDuplicate_MPIAIJ,
1606        0,
1607        0,
1608        0,
1609        0,
1610 /*40*/ MatAXPY_MPIAIJ,
1611        MatGetSubMatrices_MPIAIJ,
1612        MatIncreaseOverlap_MPIAIJ,
1613        MatGetValues_MPIAIJ,
1614        MatCopy_MPIAIJ,
1615 /*45*/ MatPrintHelp_MPIAIJ,
1616        MatScale_MPIAIJ,
1617        0,
1618        0,
1619        0,
1620 /*50*/ MatGetBlockSize_MPIAIJ,
1621        0,
1622        0,
1623        0,
1624        0,
1625 /*55*/ MatFDColoringCreate_MPIAIJ,
1626        0,
1627        MatSetUnfactored_MPIAIJ,
1628        0,
1629        0,
1630 /*60*/ MatGetSubMatrix_MPIAIJ,
1631        MatDestroy_MPIAIJ,
1632        MatView_MPIAIJ,
1633        MatGetPetscMaps_Petsc,
1634        0,
1635 /*65*/ 0,
1636        0,
1637        0,
1638        0,
1639        0,
1640 /*70*/ 0,
1641        0,
1642        MatSetColoring_MPIAIJ,
1643        MatSetValuesAdic_MPIAIJ,
1644        MatSetValuesAdifor_MPIAIJ,
1645 /*75*/ 0,
1646        0,
1647        0,
1648        0,
1649        0,
1650 /*80*/ 0,
1651        0,
1652        0,
1653        0,
1654 /*85*/ MatLoad_MPIAIJ};
1655 
1656 /* ----------------------------------------------------------------------------------------*/
1657 
1658 EXTERN_C_BEGIN
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1661 int MatStoreValues_MPIAIJ(Mat mat)
1662 {
1663   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1664   int        ierr;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1668   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 EXTERN_C_END
1672 
1673 EXTERN_C_BEGIN
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1676 int MatRetrieveValues_MPIAIJ(Mat mat)
1677 {
1678   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1679   int        ierr;
1680 
1681   PetscFunctionBegin;
1682   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1683   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 EXTERN_C_END
1687 
1688 #include "petscpc.h"
1689 EXTERN_C_BEGIN
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1692 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1693 {
1694   Mat_MPIAIJ   *b;
1695   int          ierr,i;
1696 
1697   PetscFunctionBegin;
1698   B->preallocated = PETSC_TRUE;
1699   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1700   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1701   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1702   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1703   if (d_nnz) {
1704     for (i=0; i<B->m; i++) {
1705       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1706     }
1707   }
1708   if (o_nnz) {
1709     for (i=0; i<B->m; i++) {
1710       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1711     }
1712   }
1713   b = (Mat_MPIAIJ*)B->data;
1714   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1715   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1716 
1717   PetscFunctionReturn(0);
1718 }
1719 EXTERN_C_END
1720 
1721 /*MC
1722    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1723 
1724    Options Database Keys:
1725 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1726 
1727   Level: beginner
1728 
1729 .seealso: MatCreateMPIAIJ
1730 M*/
1731 
1732 EXTERN_C_BEGIN
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatCreate_MPIAIJ"
1735 int MatCreate_MPIAIJ(Mat B)
1736 {
1737   Mat_MPIAIJ *b;
1738   int        ierr,i,size;
1739 
1740   PetscFunctionBegin;
1741   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1742 
1743   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1744   B->data         = (void*)b;
1745   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1746   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1747   B->factor       = 0;
1748   B->assembled    = PETSC_FALSE;
1749   B->mapping      = 0;
1750 
1751   B->insertmode      = NOT_SET_VALUES;
1752   b->size            = size;
1753   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1754 
1755   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1756   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1757 
1758   /* the information in the maps duplicates the information computed below, eventually
1759      we should remove the duplicate information that is not contained in the maps */
1760   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1761   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1762 
1763   /* build local table of row and column ownerships */
1764   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1765   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1766   b->cowners = b->rowners + b->size + 2;
1767   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1768   b->rowners[0] = 0;
1769   for (i=2; i<=b->size; i++) {
1770     b->rowners[i] += b->rowners[i-1];
1771   }
1772   b->rstart = b->rowners[b->rank];
1773   b->rend   = b->rowners[b->rank+1];
1774   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1775   b->cowners[0] = 0;
1776   for (i=2; i<=b->size; i++) {
1777     b->cowners[i] += b->cowners[i-1];
1778   }
1779   b->cstart = b->cowners[b->rank];
1780   b->cend   = b->cowners[b->rank+1];
1781 
1782   /* build cache for off array entries formed */
1783   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1784   b->donotstash  = PETSC_FALSE;
1785   b->colmap      = 0;
1786   b->garray      = 0;
1787   b->roworiented = PETSC_TRUE;
1788 
1789   /* stuff used for matrix vector multiply */
1790   b->lvec      = PETSC_NULL;
1791   b->Mvctx     = PETSC_NULL;
1792 
1793   /* stuff for MatGetRow() */
1794   b->rowindices   = 0;
1795   b->rowvalues    = 0;
1796   b->getrowactive = PETSC_FALSE;
1797 
1798   /* Explicitly create 2 MATSEQAIJ matrices. */
1799   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1800   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1801   PetscLogObjectParent(B,b->A);
1802   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1803   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1804   PetscLogObjectParent(B,b->B);
1805 
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1807                                      "MatStoreValues_MPIAIJ",
1808                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1810                                      "MatRetrieveValues_MPIAIJ",
1811                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1813 				     "MatGetDiagonalBlock_MPIAIJ",
1814                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1816 				     "MatIsTranspose_MPIAIJ",
1817 				     MatIsTranspose_MPIAIJ); CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1819 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1820 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1822 				     "MatDiagonalScaleLocal_MPIAIJ",
1823 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 EXTERN_C_END
1827 
1828 /*MC
1829    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1830 
1831    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1832    and MATMPIAIJ otherwise.
1833 
1834    Options Database Keys:
1835 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1836 
1837   Level: beginner
1838 
1839 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1840 M*/
1841 
1842 EXTERN_C_BEGIN
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatCreate_AIJ"
1845 int MatCreate_AIJ(Mat A) {
1846   int ierr,size;
1847 
1848   PetscFunctionBegin;
1849   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1850   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1851   if (size == 1) {
1852     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1853   } else {
1854     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 EXTERN_C_END
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1862 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1863 {
1864   Mat        mat;
1865   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1866   int        ierr;
1867 
1868   PetscFunctionBegin;
1869   *newmat       = 0;
1870   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1871   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1872   a    = (Mat_MPIAIJ*)mat->data;
1873   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1874   mat->factor       = matin->factor;
1875   mat->assembled    = PETSC_TRUE;
1876   mat->insertmode   = NOT_SET_VALUES;
1877   mat->preallocated = PETSC_TRUE;
1878 
1879   a->rstart       = oldmat->rstart;
1880   a->rend         = oldmat->rend;
1881   a->cstart       = oldmat->cstart;
1882   a->cend         = oldmat->cend;
1883   a->size         = oldmat->size;
1884   a->rank         = oldmat->rank;
1885   a->donotstash   = oldmat->donotstash;
1886   a->roworiented  = oldmat->roworiented;
1887   a->rowindices   = 0;
1888   a->rowvalues    = 0;
1889   a->getrowactive = PETSC_FALSE;
1890 
1891   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1892   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1893   if (oldmat->colmap) {
1894 #if defined (PETSC_USE_CTABLE)
1895     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1896 #else
1897     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1898     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1899     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1900 #endif
1901   } else a->colmap = 0;
1902   if (oldmat->garray) {
1903     int len;
1904     len  = oldmat->B->n;
1905     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1906     PetscLogObjectMemory(mat,len*sizeof(int));
1907     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1908   } else a->garray = 0;
1909 
1910   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1911   PetscLogObjectParent(mat,a->lvec);
1912   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1913   PetscLogObjectParent(mat,a->Mvctx);
1914   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1915   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1916   PetscLogObjectParent(mat,a->A);
1917   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1918   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1919   PetscLogObjectParent(mat,a->B);
1920   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1921   *newmat = mat;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #include "petscsys.h"
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatLoad_MPIAIJ"
1929 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1930 {
1931   Mat          A;
1932   PetscScalar  *vals,*svals;
1933   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1934   MPI_Status   status;
1935   int          i,nz,ierr,j,rstart,rend,fd;
1936   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1937   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1938   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1939 
1940   PetscFunctionBegin;
1941   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1942   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1943   if (!rank) {
1944     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1945     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1946     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1947     if (header[3] < 0) {
1948       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1949     }
1950   }
1951 
1952   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1953   M = header[1]; N = header[2];
1954   /* determine ownership of all rows */
1955   m = M/size + ((M % size) > rank);
1956   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1957   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1958   rowners[0] = 0;
1959   for (i=2; i<=size; i++) {
1960     rowners[i] += rowners[i-1];
1961   }
1962   rstart = rowners[rank];
1963   rend   = rowners[rank+1];
1964 
1965   /* distribute row lengths to all processors */
1966   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1967   offlens = ourlens + (rend-rstart);
1968   if (!rank) {
1969     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1970     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1971     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1972     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1973     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1974     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1975   } else {
1976     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1977   }
1978 
1979   if (!rank) {
1980     /* calculate the number of nonzeros on each processor */
1981     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1982     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1983     for (i=0; i<size; i++) {
1984       for (j=rowners[i]; j< rowners[i+1]; j++) {
1985         procsnz[i] += rowlengths[j];
1986       }
1987     }
1988     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1989 
1990     /* determine max buffer needed and allocate it */
1991     maxnz = 0;
1992     for (i=0; i<size; i++) {
1993       maxnz = PetscMax(maxnz,procsnz[i]);
1994     }
1995     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1996 
1997     /* read in my part of the matrix column indices  */
1998     nz   = procsnz[0];
1999     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2000     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2001 
2002     /* read in every one elses and ship off */
2003     for (i=1; i<size; i++) {
2004       nz   = procsnz[i];
2005       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2006       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2007     }
2008     ierr = PetscFree(cols);CHKERRQ(ierr);
2009   } else {
2010     /* determine buffer space needed for message */
2011     nz = 0;
2012     for (i=0; i<m; i++) {
2013       nz += ourlens[i];
2014     }
2015     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2016 
2017     /* receive message of column indices*/
2018     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2019     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2020     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2021   }
2022 
2023   /* determine column ownership if matrix is not square */
2024   if (N != M) {
2025     n      = N/size + ((N % size) > rank);
2026     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2027     cstart = cend - n;
2028   } else {
2029     cstart = rstart;
2030     cend   = rend;
2031     n      = cend - cstart;
2032   }
2033 
2034   /* loop over local rows, determining number of off diagonal entries */
2035   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2036   jj = 0;
2037   for (i=0; i<m; i++) {
2038     for (j=0; j<ourlens[i]; j++) {
2039       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2040       jj++;
2041     }
2042   }
2043 
2044   /* create our matrix */
2045   for (i=0; i<m; i++) {
2046     ourlens[i] -= offlens[i];
2047   }
2048   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2049   ierr = MatSetType(A,type);CHKERRQ(ierr);
2050   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2051 
2052   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2053   for (i=0; i<m; i++) {
2054     ourlens[i] += offlens[i];
2055   }
2056 
2057   if (!rank) {
2058     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2059 
2060     /* read in my part of the matrix numerical values  */
2061     nz   = procsnz[0];
2062     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2063 
2064     /* insert into matrix */
2065     jj      = rstart;
2066     smycols = mycols;
2067     svals   = vals;
2068     for (i=0; i<m; i++) {
2069       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2070       smycols += ourlens[i];
2071       svals   += ourlens[i];
2072       jj++;
2073     }
2074 
2075     /* read in other processors and ship out */
2076     for (i=1; i<size; i++) {
2077       nz   = procsnz[i];
2078       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2079       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2080     }
2081     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2082   } else {
2083     /* receive numeric values */
2084     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2085 
2086     /* receive message of values*/
2087     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2088     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2089     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2090 
2091     /* insert into matrix */
2092     jj      = rstart;
2093     smycols = mycols;
2094     svals   = vals;
2095     for (i=0; i<m; i++) {
2096       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2097       smycols += ourlens[i];
2098       svals   += ourlens[i];
2099       jj++;
2100     }
2101   }
2102   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2103   ierr = PetscFree(vals);CHKERRQ(ierr);
2104   ierr = PetscFree(mycols);CHKERRQ(ierr);
2105   ierr = PetscFree(rowners);CHKERRQ(ierr);
2106 
2107   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2108   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2109   *newmat = A;
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 #undef __FUNCT__
2114 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2115 /*
2116     Not great since it makes two copies of the submatrix, first an SeqAIJ
2117   in local and then by concatenating the local matrices the end result.
2118   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2119 */
2120 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2121 {
2122   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2123   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2124   Mat          *local,M,Mreuse;
2125   PetscScalar  *vwork,*aa;
2126   MPI_Comm     comm = mat->comm;
2127   Mat_SeqAIJ   *aij;
2128 
2129 
2130   PetscFunctionBegin;
2131   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2132   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2133 
2134   if (call ==  MAT_REUSE_MATRIX) {
2135     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2136     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2137     local = &Mreuse;
2138     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2139   } else {
2140     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2141     Mreuse = *local;
2142     ierr   = PetscFree(local);CHKERRQ(ierr);
2143   }
2144 
2145   /*
2146       m - number of local rows
2147       n - number of columns (same on all processors)
2148       rstart - first row in new global matrix generated
2149   */
2150   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2151   if (call == MAT_INITIAL_MATRIX) {
2152     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2153     ii  = aij->i;
2154     jj  = aij->j;
2155 
2156     /*
2157         Determine the number of non-zeros in the diagonal and off-diagonal
2158         portions of the matrix in order to do correct preallocation
2159     */
2160 
2161     /* first get start and end of "diagonal" columns */
2162     if (csize == PETSC_DECIDE) {
2163       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2164       if (mglobal == n) { /* square matrix */
2165 	nlocal = m;
2166       } else {
2167         nlocal = n/size + ((n % size) > rank);
2168       }
2169     } else {
2170       nlocal = csize;
2171     }
2172     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2173     rstart = rend - nlocal;
2174     if (rank == size - 1 && rend != n) {
2175       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2176     }
2177 
2178     /* next, compute all the lengths */
2179     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2180     olens = dlens + m;
2181     for (i=0; i<m; i++) {
2182       jend = ii[i+1] - ii[i];
2183       olen = 0;
2184       dlen = 0;
2185       for (j=0; j<jend; j++) {
2186         if (*jj < rstart || *jj >= rend) olen++;
2187         else dlen++;
2188         jj++;
2189       }
2190       olens[i] = olen;
2191       dlens[i] = dlen;
2192     }
2193     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2194     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2195     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2196     ierr = PetscFree(dlens);CHKERRQ(ierr);
2197   } else {
2198     int ml,nl;
2199 
2200     M = *newmat;
2201     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2202     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2203     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2204     /*
2205          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2206        rather than the slower MatSetValues().
2207     */
2208     M->was_assembled = PETSC_TRUE;
2209     M->assembled     = PETSC_FALSE;
2210   }
2211   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2212   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2213   ii  = aij->i;
2214   jj  = aij->j;
2215   aa  = aij->a;
2216   for (i=0; i<m; i++) {
2217     row   = rstart + i;
2218     nz    = ii[i+1] - ii[i];
2219     cwork = jj;     jj += nz;
2220     vwork = aa;     aa += nz;
2221     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2222   }
2223 
2224   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2225   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2226   *newmat = M;
2227 
2228   /* save submatrix used in processor for next request */
2229   if (call ==  MAT_INITIAL_MATRIX) {
2230     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2231     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2232   }
2233 
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2239 /*@C
2240    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2241    (the default parallel PETSc format).  For good matrix assembly performance
2242    the user should preallocate the matrix storage by setting the parameters
2243    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2244    performance can be increased by more than a factor of 50.
2245 
2246    Collective on MPI_Comm
2247 
2248    Input Parameters:
2249 +  A - the matrix
2250 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2251            (same value is used for all local rows)
2252 .  d_nnz - array containing the number of nonzeros in the various rows of the
2253            DIAGONAL portion of the local submatrix (possibly different for each row)
2254            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2255            The size of this array is equal to the number of local rows, i.e 'm'.
2256            You must leave room for the diagonal entry even if it is zero.
2257 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2258            submatrix (same value is used for all local rows).
2259 -  o_nnz - array containing the number of nonzeros in the various rows of the
2260            OFF-DIAGONAL portion of the local submatrix (possibly different for
2261            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2262            structure. The size of this array is equal to the number
2263            of local rows, i.e 'm'.
2264 
2265    The AIJ format (also called the Yale sparse matrix format or
2266    compressed row storage), is fully compatible with standard Fortran 77
2267    storage.  That is, the stored row and column indices can begin at
2268    either one (as in Fortran) or zero.  See the users manual for details.
2269 
2270    The user MUST specify either the local or global matrix dimensions
2271    (possibly both).
2272 
2273    The parallel matrix is partitioned such that the first m0 rows belong to
2274    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2275    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2276 
2277    The DIAGONAL portion of the local submatrix of a processor can be defined
2278    as the submatrix which is obtained by extraction the part corresponding
2279    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2280    first row that belongs to the processor, and r2 is the last row belonging
2281    to the this processor. This is a square mxm matrix. The remaining portion
2282    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2283 
2284    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2285 
2286    By default, this format uses inodes (identical nodes) when possible.
2287    We search for consecutive rows with the same nonzero structure, thereby
2288    reusing matrix information to achieve increased efficiency.
2289 
2290    Options Database Keys:
2291 +  -mat_aij_no_inode  - Do not use inodes
2292 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2293 -  -mat_aij_oneindex - Internally use indexing starting at 1
2294         rather than 0.  Note that when calling MatSetValues(),
2295         the user still MUST index entries starting at 0!
2296 
2297    Example usage:
2298 
2299    Consider the following 8x8 matrix with 34 non-zero values, that is
2300    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2301    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2302    as follows:
2303 
2304 .vb
2305             1  2  0  |  0  3  0  |  0  4
2306     Proc0   0  5  6  |  7  0  0  |  8  0
2307             9  0 10  | 11  0  0  | 12  0
2308     -------------------------------------
2309            13  0 14  | 15 16 17  |  0  0
2310     Proc1   0 18  0  | 19 20 21  |  0  0
2311             0  0  0  | 22 23  0  | 24  0
2312     -------------------------------------
2313     Proc2  25 26 27  |  0  0 28  | 29  0
2314            30  0  0  | 31 32 33  |  0 34
2315 .ve
2316 
2317    This can be represented as a collection of submatrices as:
2318 
2319 .vb
2320       A B C
2321       D E F
2322       G H I
2323 .ve
2324 
2325    Where the submatrices A,B,C are owned by proc0, D,E,F are
2326    owned by proc1, G,H,I are owned by proc2.
2327 
2328    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2329    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2330    The 'M','N' parameters are 8,8, and have the same values on all procs.
2331 
2332    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2333    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2334    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2335    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2336    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2337    matrix, ans [DF] as another SeqAIJ matrix.
2338 
2339    When d_nz, o_nz parameters are specified, d_nz storage elements are
2340    allocated for every row of the local diagonal submatrix, and o_nz
2341    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2342    One way to choose d_nz and o_nz is to use the max nonzerors per local
2343    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2344    In this case, the values of d_nz,o_nz are:
2345 .vb
2346      proc0 : dnz = 2, o_nz = 2
2347      proc1 : dnz = 3, o_nz = 2
2348      proc2 : dnz = 1, o_nz = 4
2349 .ve
2350    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2351    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2352    for proc3. i.e we are using 12+15+10=37 storage locations to store
2353    34 values.
2354 
2355    When d_nnz, o_nnz parameters are specified, the storage is specified
2356    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2357    In the above case the values for d_nnz,o_nnz are:
2358 .vb
2359      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2360      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2361      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2362 .ve
2363    Here the space allocated is sum of all the above values i.e 34, and
2364    hence pre-allocation is perfect.
2365 
2366    Level: intermediate
2367 
2368 .keywords: matrix, aij, compressed row, sparse, parallel
2369 
2370 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2371 @*/
2372 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2373 {
2374   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2375 
2376   PetscFunctionBegin;
2377   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2378   if (f) {
2379     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2380   }
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatCreateMPIAIJ"
2386 /*@C
2387    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2388    (the default parallel PETSc format).  For good matrix assembly performance
2389    the user should preallocate the matrix storage by setting the parameters
2390    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2391    performance can be increased by more than a factor of 50.
2392 
2393    Collective on MPI_Comm
2394 
2395    Input Parameters:
2396 +  comm - MPI communicator
2397 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2398            This value should be the same as the local size used in creating the
2399            y vector for the matrix-vector product y = Ax.
2400 .  n - This value should be the same as the local size used in creating the
2401        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2402        calculated if N is given) For square matrices n is almost always m.
2403 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2404 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2405 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2406            (same value is used for all local rows)
2407 .  d_nnz - array containing the number of nonzeros in the various rows of the
2408            DIAGONAL portion of the local submatrix (possibly different for each row)
2409            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2410            The size of this array is equal to the number of local rows, i.e 'm'.
2411            You must leave room for the diagonal entry even if it is zero.
2412 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2413            submatrix (same value is used for all local rows).
2414 -  o_nnz - array containing the number of nonzeros in the various rows of the
2415            OFF-DIAGONAL portion of the local submatrix (possibly different for
2416            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2417            structure. The size of this array is equal to the number
2418            of local rows, i.e 'm'.
2419 
2420    Output Parameter:
2421 .  A - the matrix
2422 
2423    Notes:
2424    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2425    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2426    storage requirements for this matrix.
2427 
2428    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2429    processor than it must be used on all processors that share the object for
2430    that argument.
2431 
2432    The AIJ format (also called the Yale sparse matrix format or
2433    compressed row storage), is fully compatible with standard Fortran 77
2434    storage.  That is, the stored row and column indices can begin at
2435    either one (as in Fortran) or zero.  See the users manual for details.
2436 
2437    The user MUST specify either the local or global matrix dimensions
2438    (possibly both).
2439 
2440    The parallel matrix is partitioned such that the first m0 rows belong to
2441    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2442    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2443 
2444    The DIAGONAL portion of the local submatrix of a processor can be defined
2445    as the submatrix which is obtained by extraction the part corresponding
2446    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2447    first row that belongs to the processor, and r2 is the last row belonging
2448    to the this processor. This is a square mxm matrix. The remaining portion
2449    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2450 
2451    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2452 
2453    When calling this routine with a single process communicator, a matrix of
2454    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2455    type of communicator, use the construction mechanism:
2456      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2457 
2458    By default, this format uses inodes (identical nodes) when possible.
2459    We search for consecutive rows with the same nonzero structure, thereby
2460    reusing matrix information to achieve increased efficiency.
2461 
2462    Options Database Keys:
2463 +  -mat_aij_no_inode  - Do not use inodes
2464 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2465 -  -mat_aij_oneindex - Internally use indexing starting at 1
2466         rather than 0.  Note that when calling MatSetValues(),
2467         the user still MUST index entries starting at 0!
2468 
2469 
2470    Example usage:
2471 
2472    Consider the following 8x8 matrix with 34 non-zero values, that is
2473    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2474    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2475    as follows:
2476 
2477 .vb
2478             1  2  0  |  0  3  0  |  0  4
2479     Proc0   0  5  6  |  7  0  0  |  8  0
2480             9  0 10  | 11  0  0  | 12  0
2481     -------------------------------------
2482            13  0 14  | 15 16 17  |  0  0
2483     Proc1   0 18  0  | 19 20 21  |  0  0
2484             0  0  0  | 22 23  0  | 24  0
2485     -------------------------------------
2486     Proc2  25 26 27  |  0  0 28  | 29  0
2487            30  0  0  | 31 32 33  |  0 34
2488 .ve
2489 
2490    This can be represented as a collection of submatrices as:
2491 
2492 .vb
2493       A B C
2494       D E F
2495       G H I
2496 .ve
2497 
2498    Where the submatrices A,B,C are owned by proc0, D,E,F are
2499    owned by proc1, G,H,I are owned by proc2.
2500 
2501    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2502    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2503    The 'M','N' parameters are 8,8, and have the same values on all procs.
2504 
2505    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2506    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2507    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2508    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2509    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2510    matrix, ans [DF] as another SeqAIJ matrix.
2511 
2512    When d_nz, o_nz parameters are specified, d_nz storage elements are
2513    allocated for every row of the local diagonal submatrix, and o_nz
2514    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2515    One way to choose d_nz and o_nz is to use the max nonzerors per local
2516    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2517    In this case, the values of d_nz,o_nz are:
2518 .vb
2519      proc0 : dnz = 2, o_nz = 2
2520      proc1 : dnz = 3, o_nz = 2
2521      proc2 : dnz = 1, o_nz = 4
2522 .ve
2523    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2524    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2525    for proc3. i.e we are using 12+15+10=37 storage locations to store
2526    34 values.
2527 
2528    When d_nnz, o_nnz parameters are specified, the storage is specified
2529    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2530    In the above case the values for d_nnz,o_nnz are:
2531 .vb
2532      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2533      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2534      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2535 .ve
2536    Here the space allocated is sum of all the above values i.e 34, and
2537    hence pre-allocation is perfect.
2538 
2539    Level: intermediate
2540 
2541 .keywords: matrix, aij, compressed row, sparse, parallel
2542 
2543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2544 @*/
2545 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2546 {
2547   int ierr,size;
2548 
2549   PetscFunctionBegin;
2550   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2551   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2552   if (size > 1) {
2553     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2554     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2555   } else {
2556     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2557     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2558   }
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2564 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2565 {
2566   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2567   PetscFunctionBegin;
2568   *Ad     = a->A;
2569   *Ao     = a->B;
2570   *colmap = a->garray;
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 #undef __FUNCT__
2575 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2576 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2577 {
2578   int        ierr,i;
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2580 
2581   PetscFunctionBegin;
2582   if (coloring->ctype == IS_COLORING_LOCAL) {
2583     ISColoringValue *allcolors,*colors;
2584     ISColoring      ocoloring;
2585 
2586     /* set coloring for diagonal portion */
2587     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2588 
2589     /* set coloring for off-diagonal portion */
2590     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2591     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2592     for (i=0; i<a->B->n; i++) {
2593       colors[i] = allcolors[a->garray[i]];
2594     }
2595     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2596     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2597     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2598     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2599   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2600     ISColoringValue *colors;
2601     int             *larray;
2602     ISColoring      ocoloring;
2603 
2604     /* set coloring for diagonal portion */
2605     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2606     for (i=0; i<a->A->n; i++) {
2607       larray[i] = i + a->cstart;
2608     }
2609     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2610     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2611     for (i=0; i<a->A->n; i++) {
2612       colors[i] = coloring->colors[larray[i]];
2613     }
2614     ierr = PetscFree(larray);CHKERRQ(ierr);
2615     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2616     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2617     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2618 
2619     /* set coloring for off-diagonal portion */
2620     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2621     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2622     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2623     for (i=0; i<a->B->n; i++) {
2624       colors[i] = coloring->colors[larray[i]];
2625     }
2626     ierr = PetscFree(larray);CHKERRQ(ierr);
2627     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2628     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2629     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2630   } else {
2631     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2632   }
2633 
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 #undef __FUNCT__
2638 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2639 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2640 {
2641   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2642   int        ierr;
2643 
2644   PetscFunctionBegin;
2645   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2646   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 #undef __FUNCT__
2651 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2652 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2653 {
2654   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2655   int        ierr;
2656 
2657   PetscFunctionBegin;
2658   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2659   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 #undef __FUNCT__
2664 #define __FUNCT__ "MatMerge"
2665 /*@C
2666       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2667                  matrices from each processor
2668 
2669     Collective on MPI_Comm
2670 
2671    Input Parameters:
2672 +    comm - the communicators the parallel matrix will live on
2673 -    inmat - the input sequential matrices
2674 
2675    Output Parameter:
2676 .    outmat - the parallel matrix generated
2677 
2678     Level: advanced
2679 
2680    Notes: The number of columns of the matrix in EACH of the seperate files
2681       MUST be the same.
2682 
2683 @*/
2684 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2685 {
2686   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2687   PetscScalar *values;
2688   PetscMap    columnmap,rowmap;
2689 
2690   PetscFunctionBegin;
2691 
2692   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2693 
2694   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2695   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2696   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2697   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2698   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2699   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2700 
2701   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2702   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2703   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2704   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2705   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2706 
2707   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2708   for (i=0;i<m;i++) {
2709     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2710     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2711     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2712   }
2713   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2714   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2715 
2716   for (i=0;i<m;i++) {
2717     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2718     I    = i + rstart;
2719     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2720     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2721   }
2722   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2723   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2724   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2725 
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatFileSplit"
2731 int MatFileSplit(Mat A,char *outfile)
2732 {
2733   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2734   PetscViewer out;
2735   char        *name;
2736   Mat         B;
2737   PetscScalar *values;
2738 
2739   PetscFunctionBegin;
2740 
2741   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2742   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2743   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2744   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2745   for (i=0;i<m;i++) {
2746     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2747     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2748     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2749   }
2750   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2751   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2752 
2753   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2754   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2755   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2756   sprintf(name,"%s.%d",outfile,rank);
2757   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2758   ierr = PetscFree(name);
2759   ierr = MatView(B,out);CHKERRQ(ierr);
2760   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2761   ierr = MatDestroy(B);CHKERRQ(ierr);
2762   PetscFunctionReturn(0);
2763 }
2764