xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e32c5f6eb58f4e49912dd3fbd0ec2f3434bff94f)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_SPOOLES)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421 
422 #if defined(PETSC_HAVE_MUMPS)
423   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
424   if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); }
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
431 int MatZeroEntries_MPIAIJ(Mat A)
432 {
433   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
434   int        ierr;
435 
436   PetscFunctionBegin;
437   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
438   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatZeroRows_MPIAIJ"
444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
445 {
446   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
447   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
448   int            *nprocs,j,idx,nsends,row;
449   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
450   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
451   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
452   MPI_Comm       comm = A->comm;
453   MPI_Request    *send_waits,*recv_waits;
454   MPI_Status     recv_status,*send_status;
455   IS             istmp;
456   PetscTruth     found;
457 
458   PetscFunctionBegin;
459   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
460   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
461 
462   /*  first count number of contributors to each processor */
463   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
464   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
465   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
466   for (i=0; i<N; i++) {
467     idx = rows[i];
468     found = PETSC_FALSE;
469     for (j=0; j<size; j++) {
470       if (idx >= owners[j] && idx < owners[j+1]) {
471         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
472       }
473     }
474     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
475   }
476   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
477 
478   /* inform other processors of number of messages and max length*/
479   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
480 
481   /* post receives:   */
482   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
484   for (i=0; i<nrecvs; i++) {
485     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
486   }
487 
488   /* do sends:
489       1) starts[i] gives the starting index in svalues for stuff going to
490          the ith processor
491   */
492   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
493   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
494   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
495   starts[0] = 0;
496   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
497   for (i=0; i<N; i++) {
498     svalues[starts[owner[i]]++] = rows[i];
499   }
500   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
501 
502   starts[0] = 0;
503   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
504   count = 0;
505   for (i=0; i<size; i++) {
506     if (nprocs[2*i+1]) {
507       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
508     }
509   }
510   ierr = PetscFree(starts);CHKERRQ(ierr);
511 
512   base = owners[rank];
513 
514   /*  wait on receives */
515   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
516   source = lens + nrecvs;
517   count  = nrecvs; slen = 0;
518   while (count) {
519     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
520     /* unpack receives into our local space */
521     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
522     source[imdex]  = recv_status.MPI_SOURCE;
523     lens[imdex]    = n;
524     slen          += n;
525     count--;
526   }
527   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
528 
529   /* move the data into the send scatter */
530   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
531   count = 0;
532   for (i=0; i<nrecvs; i++) {
533     values = rvalues + i*nmax;
534     for (j=0; j<lens[i]; j++) {
535       lrows[count++] = values[j] - base;
536     }
537   }
538   ierr = PetscFree(rvalues);CHKERRQ(ierr);
539   ierr = PetscFree(lens);CHKERRQ(ierr);
540   ierr = PetscFree(owner);CHKERRQ(ierr);
541   ierr = PetscFree(nprocs);CHKERRQ(ierr);
542 
543   /* actually zap the local rows */
544   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
545   PetscLogObjectParent(A,istmp);
546 
547   /*
548         Zero the required rows. If the "diagonal block" of the matrix
549      is square and the user wishes to set the diagonal we use seperate
550      code so that MatSetValues() is not called for each diagonal allocating
551      new memory, thus calling lots of mallocs and slowing things down.
552 
553        Contributed by: Mathew Knepley
554   */
555   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
556   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
557   if (diag && (l->A->M == l->A->N)) {
558     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
559   } else if (diag) {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
562       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
564     }
565     for (i = 0; i < slen; i++) {
566       row  = lrows[i] + rstart;
567       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
568     }
569     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571   } else {
572     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
573   }
574   ierr = ISDestroy(istmp);CHKERRQ(ierr);
575   ierr = PetscFree(lrows);CHKERRQ(ierr);
576 
577   /* wait on sends */
578   if (nsends) {
579     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
580     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
581     ierr = PetscFree(send_status);CHKERRQ(ierr);
582   }
583   ierr = PetscFree(send_waits);CHKERRQ(ierr);
584   ierr = PetscFree(svalues);CHKERRQ(ierr);
585 
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMult_MPIAIJ"
591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   int        ierr,nt;
595 
596   PetscFunctionBegin;
597   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
598   if (nt != A->n) {
599     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
600   }
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultAdd_MPIAIJ"
610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
611 {
612   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
613   int        ierr;
614 
615   PetscFunctionBegin;
616   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
617   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
618   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
619   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
628   int        ierr;
629 
630   PetscFunctionBegin;
631   /* do nondiagonal part */
632   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
633   /* send it on its way */
634   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   /* do local part */
636   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
637   /* receive remote parts: note this assumes the values are not actually */
638   /* inserted in yy until the next line, which is true for my implementation*/
639   /* but is not perhaps always true. */
640   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 EXTERN_C_BEGIN
645 #undef __FUNCT__
646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
648 {
649   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
650   Mat        A = Aij->A, B,Aoff = Aij->B,Boff;
651   MatType    type;
652   IS         Me,Notme;
653   int        M,N,first,last,*notme,i, ierr;
654 
655   PetscFunctionBegin;
656 
657   /* Compatible types */
658   ierr = MatGetType(Bmat,&type); CHKERRQ(ierr);
659   ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr);
660   if (!*f) SETERRQ(1,"Second matrix needs to be MPIAIJ too");
661 
662   /* Easy test: symmetric diagonal block */
663   Bij = (Mat_MPIAIJ *) Bmat->data; B = Bij->B;
664   ierr = MatIsSymmetric(A,B,f); CHKERRQ(ierr);
665   if (!*f) PetscFunctionReturn(0);
666 
667   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
668   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
669   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
670   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
671   for (i=0; i<first; i++) notme[i] = i;
672   for (i=last; i<M; i++) notme[i-last+first] = i;
673   ierr = ISCreateGeneral
674     (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
675   ierr = ISCreateStride
676     (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
677   ierr = MatGetSubMatrix
678     (B,Notme,Me,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Boff); CHKERRQ(ierr);
679   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
680   ierr = MatDestroy(Boff); CHKERRQ(ierr);
681   ierr = ISDestroy(Me); CHKERRQ(ierr);
682   ierr = ISDestroy(Notme); CHKERRQ(ierr);
683 
684   PetscFunctionReturn(0);
685 }
686 EXTERN_C_END
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
690 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
691 {
692   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696   /* do nondiagonal part */
697   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
698   /* send it on its way */
699   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
700   /* do local part */
701   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
702   /* receive remote parts: note this assumes the values are not actually */
703   /* inserted in yy until the next line, which is true for my implementation*/
704   /* but is not perhaps always true. */
705   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 /*
710   This only works correctly for square matrices where the subblock A->A is the
711    diagonal block
712 */
713 #undef __FUNCT__
714 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
715 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
716 {
717   int        ierr;
718   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
719 
720   PetscFunctionBegin;
721   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
722   if (a->rstart != a->cstart || a->rend != a->cend) {
723     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
724   }
725   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatScale_MPIAIJ"
731 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
732 {
733   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
734   int        ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
738   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDestroy_MPIAIJ"
744 int MatDestroy_MPIAIJ(Mat mat)
745 {
746   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
747   int        ierr;
748 
749   PetscFunctionBegin;
750 #if defined(PETSC_USE_LOG)
751   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
752 #endif
753   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
754   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
755   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
756   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
757 #if defined (PETSC_USE_CTABLE)
758   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
759 #else
760   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
761 #endif
762   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
763   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
764   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
765   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
766   ierr = PetscFree(aij);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769 
770 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
771 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MPIAIJ_Binary"
775 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
776 {
777   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
778   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
779   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
780   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
781   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
782   PetscScalar       *column_values;
783 
784   PetscFunctionBegin;
785   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
786   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
787   nz   = A->nz + B->nz;
788   if (rank == 0) {
789     header[0] = MAT_FILE_COOKIE;
790     header[1] = mat->M;
791     header[2] = mat->N;
792     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
793     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
794     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
795     /* get largest number of rows any processor has */
796     rlen = mat->m;
797     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
798     for (i=1; i<size; i++) {
799       rlen = PetscMax(rlen,range[i+1] - range[i]);
800     }
801   } else {
802     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
803     rlen = mat->m;
804   }
805 
806   /* load up the local row counts */
807   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
808   for (i=0; i<mat->m; i++) {
809     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
810   }
811 
812   /* store the row lengths to the file */
813   if (rank == 0) {
814     MPI_Status status;
815     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
816     for (i=1; i<size; i++) {
817       rlen = range[i+1] - range[i];
818       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
819       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
820     }
821   } else {
822     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
823   }
824   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
825 
826   /* load up the local column indices */
827   nzmax = nz; /* )th processor needs space a largest processor needs */
828   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
829   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
830   cnt  = 0;
831   for (i=0; i<mat->m; i++) {
832     for (j=B->i[i]; j<B->i[i+1]; j++) {
833       if ( (col = garray[B->j[j]]) > cstart) break;
834       column_indices[cnt++] = col;
835     }
836     for (k=A->i[i]; k<A->i[i+1]; k++) {
837       column_indices[cnt++] = A->j[k] + cstart;
838     }
839     for (; j<B->i[i+1]; j++) {
840       column_indices[cnt++] = garray[B->j[j]];
841     }
842   }
843   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
844 
845   /* store the column indices to the file */
846   if (rank == 0) {
847     MPI_Status status;
848     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
849     for (i=1; i<size; i++) {
850       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
851       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
852       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
854     }
855   } else {
856     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
857     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
858   }
859   ierr = PetscFree(column_indices);CHKERRQ(ierr);
860 
861   /* load up the local column values */
862   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
863   cnt  = 0;
864   for (i=0; i<mat->m; i++) {
865     for (j=B->i[i]; j<B->i[i+1]; j++) {
866       if ( garray[B->j[j]] > cstart) break;
867       column_values[cnt++] = B->a[j];
868     }
869     for (k=A->i[i]; k<A->i[i+1]; k++) {
870       column_values[cnt++] = A->a[k];
871     }
872     for (; j<B->i[i+1]; j++) {
873       column_values[cnt++] = B->a[j];
874     }
875   }
876   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
877 
878   /* store the column values to the file */
879   if (rank == 0) {
880     MPI_Status status;
881     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
882     for (i=1; i<size; i++) {
883       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
884       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
885       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
886       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
887     }
888   } else {
889     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
890     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
891   }
892   ierr = PetscFree(column_values);CHKERRQ(ierr);
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
898 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
899 {
900   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
901   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
902   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
903   PetscTruth        isdraw,isascii,flg,isbinary;
904   PetscViewer       sviewer;
905   PetscViewerFormat format;
906 
907   PetscFunctionBegin;
908   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
909   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
910   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
911   if (isascii) {
912     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
913     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
914       MatInfo info;
915       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
916       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
917       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
918       if (flg) {
919         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
920 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
921       } else {
922         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
923 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
924       }
925       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
926       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
927       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
928       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
929       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
930       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
931       PetscFunctionReturn(0);
932     } else if (format == PETSC_VIEWER_ASCII_INFO) {
933       PetscFunctionReturn(0);
934     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
935 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
936       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
937 #endif
938 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
939       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
940 #endif
941 
942       PetscFunctionReturn(0);
943     }
944   } else if (isbinary) {
945     if (size == 1) {
946       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
947       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
948     } else {
949       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
950     }
951     PetscFunctionReturn(0);
952   } else if (isdraw) {
953     PetscDraw  draw;
954     PetscTruth isnull;
955     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
956     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
957   }
958 
959   if (size == 1) {
960     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
961     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
962   } else {
963     /* assemble the entire matrix onto first processor. */
964     Mat         A;
965     Mat_SeqAIJ *Aloc;
966     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
967     PetscScalar *a;
968 
969     if (!rank) {
970       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
971     } else {
972       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
973     }
974     PetscLogObjectParent(mat,A);
975 
976     /* copy over the A part */
977     Aloc = (Mat_SeqAIJ*)aij->A->data;
978     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
979     row = aij->rstart;
980     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
981     for (i=0; i<m; i++) {
982       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
983       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
984     }
985     aj = Aloc->j;
986     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
987 
988     /* copy over the B part */
989     Aloc = (Mat_SeqAIJ*)aij->B->data;
990     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
991     row  = aij->rstart;
992     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
993     ct   = cols;
994     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
995     for (i=0; i<m; i++) {
996       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
997       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
998     }
999     ierr = PetscFree(ct);CHKERRQ(ierr);
1000     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1001     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1002     /*
1003        Everyone has to call to draw the matrix since the graphics waits are
1004        synchronized across all processors that share the PetscDraw object
1005     */
1006     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1007     if (!rank) {
1008       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1009       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1010     }
1011     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1012     ierr = MatDestroy(A);CHKERRQ(ierr);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatView_MPIAIJ"
1019 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1020 {
1021   int        ierr;
1022   PetscTruth isascii,isdraw,issocket,isbinary;
1023 
1024   PetscFunctionBegin;
1025   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1026   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1027   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1028   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1029   if (isascii || isdraw || isbinary || issocket) {
1030     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1031   } else {
1032     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1033   }
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatRelax_MPIAIJ"
1041 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1042 {
1043   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1044   int          ierr;
1045   Vec          bb1;
1046   PetscScalar  mone=-1.0;
1047 
1048   PetscFunctionBegin;
1049   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1050 
1051   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1052 
1053   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1054     if (flag & SOR_ZERO_INITIAL_GUESS) {
1055       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1056       its--;
1057     }
1058 
1059     while (its--) {
1060       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1061       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1062 
1063       /* update rhs: bb1 = bb - B*x */
1064       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1065       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1066 
1067       /* local sweep */
1068       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1069       CHKERRQ(ierr);
1070     }
1071   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1072     if (flag & SOR_ZERO_INITIAL_GUESS) {
1073       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1074       its--;
1075     }
1076     while (its--) {
1077       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1078       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1079 
1080       /* update rhs: bb1 = bb - B*x */
1081       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1082       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1083 
1084       /* local sweep */
1085       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1086       CHKERRQ(ierr);
1087     }
1088   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1089     if (flag & SOR_ZERO_INITIAL_GUESS) {
1090       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1091       its--;
1092     }
1093     while (its--) {
1094       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1095       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1096 
1097       /* update rhs: bb1 = bb - B*x */
1098       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1099       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1100 
1101       /* local sweep */
1102       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1103       CHKERRQ(ierr);
1104     }
1105   } else {
1106     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1107   }
1108 
1109   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1115 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1116 {
1117   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1118   Mat        A = mat->A,B = mat->B;
1119   int        ierr;
1120   PetscReal  isend[5],irecv[5];
1121 
1122   PetscFunctionBegin;
1123   info->block_size     = 1.0;
1124   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1125   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1126   isend[3] = info->memory;  isend[4] = info->mallocs;
1127   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1128   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1129   isend[3] += info->memory;  isend[4] += info->mallocs;
1130   if (flag == MAT_LOCAL) {
1131     info->nz_used      = isend[0];
1132     info->nz_allocated = isend[1];
1133     info->nz_unneeded  = isend[2];
1134     info->memory       = isend[3];
1135     info->mallocs      = isend[4];
1136   } else if (flag == MAT_GLOBAL_MAX) {
1137     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1138     info->nz_used      = irecv[0];
1139     info->nz_allocated = irecv[1];
1140     info->nz_unneeded  = irecv[2];
1141     info->memory       = irecv[3];
1142     info->mallocs      = irecv[4];
1143   } else if (flag == MAT_GLOBAL_SUM) {
1144     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1145     info->nz_used      = irecv[0];
1146     info->nz_allocated = irecv[1];
1147     info->nz_unneeded  = irecv[2];
1148     info->memory       = irecv[3];
1149     info->mallocs      = irecv[4];
1150   }
1151   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1152   info->fill_ratio_needed = 0;
1153   info->factor_mallocs    = 0;
1154   info->rows_global       = (double)matin->M;
1155   info->columns_global    = (double)matin->N;
1156   info->rows_local        = (double)matin->m;
1157   info->columns_local     = (double)matin->N;
1158 
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatSetOption_MPIAIJ"
1164 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1165 {
1166   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1167   int        ierr;
1168 
1169   PetscFunctionBegin;
1170   switch (op) {
1171   case MAT_NO_NEW_NONZERO_LOCATIONS:
1172   case MAT_YES_NEW_NONZERO_LOCATIONS:
1173   case MAT_COLUMNS_UNSORTED:
1174   case MAT_COLUMNS_SORTED:
1175   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1176   case MAT_KEEP_ZEROED_ROWS:
1177   case MAT_NEW_NONZERO_LOCATION_ERR:
1178   case MAT_USE_INODES:
1179   case MAT_DO_NOT_USE_INODES:
1180   case MAT_IGNORE_ZERO_ENTRIES:
1181     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1182     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1183     break;
1184   case MAT_ROW_ORIENTED:
1185     a->roworiented = PETSC_TRUE;
1186     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1187     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1188     break;
1189   case MAT_ROWS_SORTED:
1190   case MAT_ROWS_UNSORTED:
1191   case MAT_YES_NEW_DIAGONALS:
1192     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1193     break;
1194   case MAT_COLUMN_ORIENTED:
1195     a->roworiented = PETSC_FALSE;
1196     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1197     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1198     break;
1199   case MAT_IGNORE_OFF_PROC_ENTRIES:
1200     a->donotstash = PETSC_TRUE;
1201     break;
1202   case MAT_NO_NEW_DIAGONALS:
1203     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1204   default:
1205     SETERRQ(PETSC_ERR_SUP,"unknown option");
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatGetRow_MPIAIJ"
1212 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1213 {
1214   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1215   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1216   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1217   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1218   int          *cmap,*idx_p;
1219 
1220   PetscFunctionBegin;
1221   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1222   mat->getrowactive = PETSC_TRUE;
1223 
1224   if (!mat->rowvalues && (idx || v)) {
1225     /*
1226         allocate enough space to hold information from the longest row.
1227     */
1228     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1229     int     max = 1,tmp;
1230     for (i=0; i<matin->m; i++) {
1231       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1232       if (max < tmp) { max = tmp; }
1233     }
1234     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1235     mat->rowindices = (int*)(mat->rowvalues + max);
1236   }
1237 
1238   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1239   lrow = row - rstart;
1240 
1241   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1242   if (!v)   {pvA = 0; pvB = 0;}
1243   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1244   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1245   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1246   nztot = nzA + nzB;
1247 
1248   cmap  = mat->garray;
1249   if (v  || idx) {
1250     if (nztot) {
1251       /* Sort by increasing column numbers, assuming A and B already sorted */
1252       int imark = -1;
1253       if (v) {
1254         *v = v_p = mat->rowvalues;
1255         for (i=0; i<nzB; i++) {
1256           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1257           else break;
1258         }
1259         imark = i;
1260         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1261         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1262       }
1263       if (idx) {
1264         *idx = idx_p = mat->rowindices;
1265         if (imark > -1) {
1266           for (i=0; i<imark; i++) {
1267             idx_p[i] = cmap[cworkB[i]];
1268           }
1269         } else {
1270           for (i=0; i<nzB; i++) {
1271             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1272             else break;
1273           }
1274           imark = i;
1275         }
1276         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1277         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1278       }
1279     } else {
1280       if (idx) *idx = 0;
1281       if (v)   *v   = 0;
1282     }
1283   }
1284   *nz = nztot;
1285   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1286   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1292 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1293 {
1294   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1295 
1296   PetscFunctionBegin;
1297   if (aij->getrowactive == PETSC_FALSE) {
1298     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1299   }
1300   aij->getrowactive = PETSC_FALSE;
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatNorm_MPIAIJ"
1306 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1307 {
1308   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1309   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1310   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1311   PetscReal    sum = 0.0;
1312   PetscScalar  *v;
1313 
1314   PetscFunctionBegin;
1315   if (aij->size == 1) {
1316     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1317   } else {
1318     if (type == NORM_FROBENIUS) {
1319       v = amat->a;
1320       for (i=0; i<amat->nz; i++) {
1321 #if defined(PETSC_USE_COMPLEX)
1322         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1323 #else
1324         sum += (*v)*(*v); v++;
1325 #endif
1326       }
1327       v = bmat->a;
1328       for (i=0; i<bmat->nz; i++) {
1329 #if defined(PETSC_USE_COMPLEX)
1330         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1331 #else
1332         sum += (*v)*(*v); v++;
1333 #endif
1334       }
1335       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1336       *norm = sqrt(*norm);
1337     } else if (type == NORM_1) { /* max column norm */
1338       PetscReal *tmp,*tmp2;
1339       int    *jj,*garray = aij->garray;
1340       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1341       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1342       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1343       *norm = 0.0;
1344       v = amat->a; jj = amat->j;
1345       for (j=0; j<amat->nz; j++) {
1346         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1347       }
1348       v = bmat->a; jj = bmat->j;
1349       for (j=0; j<bmat->nz; j++) {
1350         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1351       }
1352       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1353       for (j=0; j<mat->N; j++) {
1354         if (tmp2[j] > *norm) *norm = tmp2[j];
1355       }
1356       ierr = PetscFree(tmp);CHKERRQ(ierr);
1357       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1358     } else if (type == NORM_INFINITY) { /* max row norm */
1359       PetscReal ntemp = 0.0;
1360       for (j=0; j<aij->A->m; j++) {
1361         v = amat->a + amat->i[j] + shift;
1362         sum = 0.0;
1363         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1364           sum += PetscAbsScalar(*v); v++;
1365         }
1366         v = bmat->a + bmat->i[j] + shift;
1367         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1368           sum += PetscAbsScalar(*v); v++;
1369         }
1370         if (sum > ntemp) ntemp = sum;
1371       }
1372       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1373     } else {
1374       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1375     }
1376   }
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatTranspose_MPIAIJ"
1382 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1383 {
1384   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1385   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1386   int          ierr,shift = Aloc->indexshift;
1387   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1388   Mat          B;
1389   PetscScalar  *array;
1390 
1391   PetscFunctionBegin;
1392   if (!matout && M != N) {
1393     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1394   }
1395 
1396   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1397 
1398   /* copy over the A part */
1399   Aloc = (Mat_SeqAIJ*)a->A->data;
1400   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1401   row = a->rstart;
1402   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1403   for (i=0; i<m; i++) {
1404     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1405     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1406   }
1407   aj = Aloc->j;
1408   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1409 
1410   /* copy over the B part */
1411   Aloc = (Mat_SeqAIJ*)a->B->data;
1412   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1413   row  = a->rstart;
1414   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1415   ct   = cols;
1416   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1417   for (i=0; i<m; i++) {
1418     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1419     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1420   }
1421   ierr = PetscFree(ct);CHKERRQ(ierr);
1422   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1423   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1424   if (matout) {
1425     *matout = B;
1426   } else {
1427     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1434 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1435 {
1436   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1437   Mat        a = aij->A,b = aij->B;
1438   int        ierr,s1,s2,s3;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1442   if (rr) {
1443     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1444     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1445     /* Overlap communication with computation. */
1446     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1447   }
1448   if (ll) {
1449     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1450     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1451     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1452   }
1453   /* scale  the diagonal block */
1454   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1455 
1456   if (rr) {
1457     /* Do a scatter end and then right scale the off-diagonal block */
1458     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1459     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1460   }
1461 
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1468 int MatPrintHelp_MPIAIJ(Mat A)
1469 {
1470   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1471   int        ierr;
1472 
1473   PetscFunctionBegin;
1474   if (!a->rank) {
1475     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1482 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1483 {
1484   PetscFunctionBegin;
1485   *bs = 1;
1486   PetscFunctionReturn(0);
1487 }
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1490 int MatSetUnfactored_MPIAIJ(Mat A)
1491 {
1492   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1493   int        ierr;
1494 
1495   PetscFunctionBegin;
1496   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatEqual_MPIAIJ"
1502 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1503 {
1504   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1505   Mat        a,b,c,d;
1506   PetscTruth flg;
1507   int        ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1511   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1512   a = matA->A; b = matA->B;
1513   c = matB->A; d = matB->B;
1514 
1515   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1516   if (flg == PETSC_TRUE) {
1517     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1518   }
1519   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatCopy_MPIAIJ"
1525 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1526 {
1527   int        ierr;
1528   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1529   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1530   PetscTruth flg;
1531 
1532   PetscFunctionBegin;
1533   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1534   if (str != SAME_NONZERO_PATTERN || !flg) {
1535     /* because of the column compression in the off-processor part of the matrix a->B,
1536        the number of columns in a->B and b->B may be different, hence we cannot call
1537        the MatCopy() directly on the two parts. If need be, we can provide a more
1538        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1539        then copying the submatrices */
1540     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1541   } else {
1542     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1543     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1544   }
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1550 int MatSetUpPreallocation_MPIAIJ(Mat A)
1551 {
1552   int        ierr;
1553 
1554   PetscFunctionBegin;
1555   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1560 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1561 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1562 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1563 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1564 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1565 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1566 #endif
1567 
1568 #include "petscblaslapack.h"
1569 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatAXPY_MPIAIJ"
1572 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1573 {
1574   int        ierr,one=1,i;
1575   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1576   Mat_SeqAIJ *x,*y;
1577 
1578   PetscFunctionBegin;
1579   if (str == SAME_NONZERO_PATTERN) {
1580     x = (Mat_SeqAIJ *)xx->A->data;
1581     y = (Mat_SeqAIJ *)yy->A->data;
1582     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1583     x = (Mat_SeqAIJ *)xx->B->data;
1584     y = (Mat_SeqAIJ *)yy->B->data;
1585     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1586   } else if (str == SUBSET_NONZERO_PATTERN) {
1587     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1588 
1589     x = (Mat_SeqAIJ *)xx->B->data;
1590     y = (Mat_SeqAIJ *)yy->B->data;
1591     if (y->xtoy && y->XtoY != xx->B) {
1592       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1593       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1594     }
1595     if (!y->xtoy) { /* get xtoy */
1596       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1597       y->XtoY = xx->B;
1598     }
1599     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1600   } else {
1601     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1602   }
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 /* -------------------------------------------------------------------*/
1607 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1608        MatGetRow_MPIAIJ,
1609        MatRestoreRow_MPIAIJ,
1610        MatMult_MPIAIJ,
1611        MatMultAdd_MPIAIJ,
1612        MatMultTranspose_MPIAIJ,
1613        MatMultTransposeAdd_MPIAIJ,
1614        0,
1615        0,
1616        0,
1617        0,
1618        0,
1619        0,
1620        MatRelax_MPIAIJ,
1621        MatTranspose_MPIAIJ,
1622        MatGetInfo_MPIAIJ,
1623        MatEqual_MPIAIJ,
1624        MatGetDiagonal_MPIAIJ,
1625        MatDiagonalScale_MPIAIJ,
1626        MatNorm_MPIAIJ,
1627        MatAssemblyBegin_MPIAIJ,
1628        MatAssemblyEnd_MPIAIJ,
1629        0,
1630        MatSetOption_MPIAIJ,
1631        MatZeroEntries_MPIAIJ,
1632        MatZeroRows_MPIAIJ,
1633 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1634        MatLUFactorSymbolic_MPIAIJ_TFS,
1635 #else
1636        0,
1637 #endif
1638        0,
1639        0,
1640        0,
1641        MatSetUpPreallocation_MPIAIJ,
1642        0,
1643        0,
1644        0,
1645        0,
1646        MatDuplicate_MPIAIJ,
1647        0,
1648        0,
1649        0,
1650        0,
1651        MatAXPY_MPIAIJ,
1652        MatGetSubMatrices_MPIAIJ,
1653        MatIncreaseOverlap_MPIAIJ,
1654        MatGetValues_MPIAIJ,
1655        MatCopy_MPIAIJ,
1656        MatPrintHelp_MPIAIJ,
1657        MatScale_MPIAIJ,
1658        0,
1659        0,
1660        0,
1661        MatGetBlockSize_MPIAIJ,
1662        0,
1663        0,
1664        0,
1665        0,
1666        MatFDColoringCreate_MPIAIJ,
1667        0,
1668        MatSetUnfactored_MPIAIJ,
1669        0,
1670        0,
1671        MatGetSubMatrix_MPIAIJ,
1672        MatDestroy_MPIAIJ,
1673        MatView_MPIAIJ,
1674        MatGetPetscMaps_Petsc,
1675        0,
1676        0,
1677        0,
1678        0,
1679        0,
1680        0,
1681        0,
1682        0,
1683        MatSetColoring_MPIAIJ,
1684        MatSetValuesAdic_MPIAIJ,
1685        MatSetValuesAdifor_MPIAIJ
1686 };
1687 
1688 /* ----------------------------------------------------------------------------------------*/
1689 
1690 EXTERN_C_BEGIN
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1693 int MatStoreValues_MPIAIJ(Mat mat)
1694 {
1695   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1696   int        ierr;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1700   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 EXTERN_C_END
1704 
1705 EXTERN_C_BEGIN
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1708 int MatRetrieveValues_MPIAIJ(Mat mat)
1709 {
1710   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1711   int        ierr;
1712 
1713   PetscFunctionBegin;
1714   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1715   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 EXTERN_C_END
1719 
1720 #include "petscpc.h"
1721 EXTERN_C_BEGIN
1722 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1723 EXTERN_C_END
1724 
1725 EXTERN_C_BEGIN
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatCreate_MPIAIJ"
1728 int MatCreate_MPIAIJ(Mat B)
1729 {
1730   Mat_MPIAIJ *b;
1731   int        ierr,i,size;
1732 
1733   PetscFunctionBegin;
1734   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1735 
1736   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1737   B->data         = (void*)b;
1738   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1739   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1740   B->factor       = 0;
1741   B->assembled    = PETSC_FALSE;
1742   B->mapping      = 0;
1743 
1744   B->insertmode      = NOT_SET_VALUES;
1745   b->size            = size;
1746   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1747 
1748   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1749   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1750 
1751   /* the information in the maps duplicates the information computed below, eventually
1752      we should remove the duplicate information that is not contained in the maps */
1753   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1754   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1755 
1756   /* build local table of row and column ownerships */
1757   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1758   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1759   b->cowners = b->rowners + b->size + 2;
1760   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1761   b->rowners[0] = 0;
1762   for (i=2; i<=b->size; i++) {
1763     b->rowners[i] += b->rowners[i-1];
1764   }
1765   b->rstart = b->rowners[b->rank];
1766   b->rend   = b->rowners[b->rank+1];
1767   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1768   b->cowners[0] = 0;
1769   for (i=2; i<=b->size; i++) {
1770     b->cowners[i] += b->cowners[i-1];
1771   }
1772   b->cstart = b->cowners[b->rank];
1773   b->cend   = b->cowners[b->rank+1];
1774 
1775   /* build cache for off array entries formed */
1776   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1777   b->donotstash  = PETSC_FALSE;
1778   b->colmap      = 0;
1779   b->garray      = 0;
1780   b->roworiented = PETSC_TRUE;
1781 
1782   /* stuff used for matrix vector multiply */
1783   b->lvec      = PETSC_NULL;
1784   b->Mvctx     = PETSC_NULL;
1785 
1786   /* stuff for MatGetRow() */
1787   b->rowindices   = 0;
1788   b->rowvalues    = 0;
1789   b->getrowactive = PETSC_FALSE;
1790 
1791   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1792                                      "MatStoreValues_MPIAIJ",
1793                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1794   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1795                                      "MatRetrieveValues_MPIAIJ",
1796                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1798 				     "MatGetDiagonalBlock_MPIAIJ",
1799                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1801 				     "MatIsSymmetric_MPIAIJ",
1802 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1803   PetscFunctionReturn(0);
1804 }
1805 EXTERN_C_END
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1809 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1810 {
1811   Mat        mat;
1812   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1813   int        ierr;
1814 
1815   PetscFunctionBegin;
1816   *newmat       = 0;
1817   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1818   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1819   a    = (Mat_MPIAIJ*)mat->data;
1820   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1821   mat->factor       = matin->factor;
1822   mat->assembled    = PETSC_TRUE;
1823   mat->insertmode   = NOT_SET_VALUES;
1824   mat->preallocated = PETSC_TRUE;
1825 
1826   a->rstart       = oldmat->rstart;
1827   a->rend         = oldmat->rend;
1828   a->cstart       = oldmat->cstart;
1829   a->cend         = oldmat->cend;
1830   a->size         = oldmat->size;
1831   a->rank         = oldmat->rank;
1832   a->donotstash   = oldmat->donotstash;
1833   a->roworiented  = oldmat->roworiented;
1834   a->rowindices   = 0;
1835   a->rowvalues    = 0;
1836   a->getrowactive = PETSC_FALSE;
1837 
1838   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1839   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1840   if (oldmat->colmap) {
1841 #if defined (PETSC_USE_CTABLE)
1842     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1843 #else
1844     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1845     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1846     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1847 #endif
1848   } else a->colmap = 0;
1849   if (oldmat->garray) {
1850     int len;
1851     len  = oldmat->B->n;
1852     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1853     PetscLogObjectMemory(mat,len*sizeof(int));
1854     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1855   } else a->garray = 0;
1856 
1857   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1858   PetscLogObjectParent(mat,a->lvec);
1859   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1860   PetscLogObjectParent(mat,a->Mvctx);
1861   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1862   PetscLogObjectParent(mat,a->A);
1863   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1864   PetscLogObjectParent(mat,a->B);
1865   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1866   *newmat = mat;
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #include "petscsys.h"
1871 
1872 EXTERN_C_BEGIN
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatLoad_MPIAIJ"
1875 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1876 {
1877   Mat          A;
1878   PetscScalar  *vals,*svals;
1879   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1880   MPI_Status   status;
1881   int          i,nz,ierr,j,rstart,rend,fd;
1882   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1883   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1884   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1885 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS)
1886   PetscTruth   flag;
1887 #endif
1888 
1889   PetscFunctionBegin;
1890   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1891   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1892   if (!rank) {
1893     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1894     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1895     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1896     if (header[3] < 0) {
1897       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1898     }
1899   }
1900 
1901   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1902   M = header[1]; N = header[2];
1903   /* determine ownership of all rows */
1904   m = M/size + ((M % size) > rank);
1905   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1906   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1907   rowners[0] = 0;
1908   for (i=2; i<=size; i++) {
1909     rowners[i] += rowners[i-1];
1910   }
1911   rstart = rowners[rank];
1912   rend   = rowners[rank+1];
1913 
1914   /* distribute row lengths to all processors */
1915   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1916   offlens = ourlens + (rend-rstart);
1917   if (!rank) {
1918     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1919     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1920     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1921     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1922     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1923     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1924   } else {
1925     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1926   }
1927 
1928   if (!rank) {
1929     /* calculate the number of nonzeros on each processor */
1930     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1931     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1932     for (i=0; i<size; i++) {
1933       for (j=rowners[i]; j< rowners[i+1]; j++) {
1934         procsnz[i] += rowlengths[j];
1935       }
1936     }
1937     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1938 
1939     /* determine max buffer needed and allocate it */
1940     maxnz = 0;
1941     for (i=0; i<size; i++) {
1942       maxnz = PetscMax(maxnz,procsnz[i]);
1943     }
1944     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1945 
1946     /* read in my part of the matrix column indices  */
1947     nz   = procsnz[0];
1948     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1949     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1950 
1951     /* read in every one elses and ship off */
1952     for (i=1; i<size; i++) {
1953       nz   = procsnz[i];
1954       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1955       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1956     }
1957     ierr = PetscFree(cols);CHKERRQ(ierr);
1958   } else {
1959     /* determine buffer space needed for message */
1960     nz = 0;
1961     for (i=0; i<m; i++) {
1962       nz += ourlens[i];
1963     }
1964     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1965 
1966     /* receive message of column indices*/
1967     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1968     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1969     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1970   }
1971 
1972   /* determine column ownership if matrix is not square */
1973   if (N != M) {
1974     n      = N/size + ((N % size) > rank);
1975     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1976     cstart = cend - n;
1977   } else {
1978     cstart = rstart;
1979     cend   = rend;
1980     n      = cend - cstart;
1981   }
1982 
1983   /* loop over local rows, determining number of off diagonal entries */
1984   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1985   jj = 0;
1986   for (i=0; i<m; i++) {
1987     for (j=0; j<ourlens[i]; j++) {
1988       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1989       jj++;
1990     }
1991   }
1992 
1993   /* create our matrix */
1994   for (i=0; i<m; i++) {
1995     ourlens[i] -= offlens[i];
1996   }
1997   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1998   A = *newmat;
1999   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2000   for (i=0; i<m; i++) {
2001     ourlens[i] += offlens[i];
2002   }
2003 
2004   if (!rank) {
2005     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2006 
2007     /* read in my part of the matrix numerical values  */
2008     nz   = procsnz[0];
2009     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2010 
2011     /* insert into matrix */
2012     jj      = rstart;
2013     smycols = mycols;
2014     svals   = vals;
2015     for (i=0; i<m; i++) {
2016       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2017       smycols += ourlens[i];
2018       svals   += ourlens[i];
2019       jj++;
2020     }
2021 
2022     /* read in other processors and ship out */
2023     for (i=1; i<size; i++) {
2024       nz   = procsnz[i];
2025       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2026       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2027     }
2028     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2029   } else {
2030     /* receive numeric values */
2031     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2032 
2033     /* receive message of values*/
2034     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2035     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2036     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2037 
2038     /* insert into matrix */
2039     jj      = rstart;
2040     smycols = mycols;
2041     svals   = vals;
2042     for (i=0; i<m; i++) {
2043       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2044       smycols += ourlens[i];
2045       svals   += ourlens[i];
2046       jj++;
2047     }
2048   }
2049   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2050   ierr = PetscFree(vals);CHKERRQ(ierr);
2051   ierr = PetscFree(mycols);CHKERRQ(ierr);
2052   ierr = PetscFree(rowners);CHKERRQ(ierr);
2053 
2054   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2055   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2056 #if defined(PETSC_HAVE_SPOOLES)
2057   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2058   if (flag) {
2059     if (size == 1) {
2060       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2061     } else {
2062       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2063     }
2064   }
2065 #endif
2066 #if defined(PETSC_HAVE_SUPERLUDIST)
2067   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2068   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2069 #endif
2070 #if defined(PETSC_HAVE_MUMPS)
2071   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
2072   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
2073 #endif
2074   PetscFunctionReturn(0);
2075 }
2076 EXTERN_C_END
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2080 /*
2081     Not great since it makes two copies of the submatrix, first an SeqAIJ
2082   in local and then by concatenating the local matrices the end result.
2083   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2084 */
2085 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2086 {
2087   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2088   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2089   Mat          *local,M,Mreuse;
2090   PetscScalar  *vwork,*aa;
2091   MPI_Comm     comm = mat->comm;
2092   Mat_SeqAIJ   *aij;
2093 
2094 
2095   PetscFunctionBegin;
2096   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2097   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2098 
2099   if (call ==  MAT_REUSE_MATRIX) {
2100     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2101     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2102     local = &Mreuse;
2103     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2104   } else {
2105     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2106     Mreuse = *local;
2107     ierr   = PetscFree(local);CHKERRQ(ierr);
2108   }
2109 
2110   /*
2111       m - number of local rows
2112       n - number of columns (same on all processors)
2113       rstart - first row in new global matrix generated
2114   */
2115   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2116   if (call == MAT_INITIAL_MATRIX) {
2117     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2118     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2119     ii  = aij->i;
2120     jj  = aij->j;
2121 
2122     /*
2123         Determine the number of non-zeros in the diagonal and off-diagonal
2124         portions of the matrix in order to do correct preallocation
2125     */
2126 
2127     /* first get start and end of "diagonal" columns */
2128     if (csize == PETSC_DECIDE) {
2129       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2130       if (mglobal == n) { /* square matrix */
2131 	nlocal = m;
2132       } else {
2133         nlocal = n/size + ((n % size) > rank);
2134       }
2135     } else {
2136       nlocal = csize;
2137     }
2138     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2139     rstart = rend - nlocal;
2140     if (rank == size - 1 && rend != n) {
2141       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2142     }
2143 
2144     /* next, compute all the lengths */
2145     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2146     olens = dlens + m;
2147     for (i=0; i<m; i++) {
2148       jend = ii[i+1] - ii[i];
2149       olen = 0;
2150       dlen = 0;
2151       for (j=0; j<jend; j++) {
2152         if (*jj < rstart || *jj >= rend) olen++;
2153         else dlen++;
2154         jj++;
2155       }
2156       olens[i] = olen;
2157       dlens[i] = dlen;
2158     }
2159     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2160     ierr = PetscFree(dlens);CHKERRQ(ierr);
2161   } else {
2162     int ml,nl;
2163 
2164     M = *newmat;
2165     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2166     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2167     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2168     /*
2169          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2170        rather than the slower MatSetValues().
2171     */
2172     M->was_assembled = PETSC_TRUE;
2173     M->assembled     = PETSC_FALSE;
2174   }
2175   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2176   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2177   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2178   ii  = aij->i;
2179   jj  = aij->j;
2180   aa  = aij->a;
2181   for (i=0; i<m; i++) {
2182     row   = rstart + i;
2183     nz    = ii[i+1] - ii[i];
2184     cwork = jj;     jj += nz;
2185     vwork = aa;     aa += nz;
2186     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2187   }
2188 
2189   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2190   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2191   *newmat = M;
2192 
2193   /* save submatrix used in processor for next request */
2194   if (call ==  MAT_INITIAL_MATRIX) {
2195     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2196     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2197   }
2198 
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 #undef __FUNCT__
2203 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2204 /*@C
2205    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2206    (the default parallel PETSc format).  For good matrix assembly performance
2207    the user should preallocate the matrix storage by setting the parameters
2208    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2209    performance can be increased by more than a factor of 50.
2210 
2211    Collective on MPI_Comm
2212 
2213    Input Parameters:
2214 +  A - the matrix
2215 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2216            (same value is used for all local rows)
2217 .  d_nnz - array containing the number of nonzeros in the various rows of the
2218            DIAGONAL portion of the local submatrix (possibly different for each row)
2219            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2220            The size of this array is equal to the number of local rows, i.e 'm'.
2221            You must leave room for the diagonal entry even if it is zero.
2222 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2223            submatrix (same value is used for all local rows).
2224 -  o_nnz - array containing the number of nonzeros in the various rows of the
2225            OFF-DIAGONAL portion of the local submatrix (possibly different for
2226            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2227            structure. The size of this array is equal to the number
2228            of local rows, i.e 'm'.
2229 
2230    The AIJ format (also called the Yale sparse matrix format or
2231    compressed row storage), is fully compatible with standard Fortran 77
2232    storage.  That is, the stored row and column indices can begin at
2233    either one (as in Fortran) or zero.  See the users manual for details.
2234 
2235    The user MUST specify either the local or global matrix dimensions
2236    (possibly both).
2237 
2238    The parallel matrix is partitioned such that the first m0 rows belong to
2239    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2240    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2241 
2242    The DIAGONAL portion of the local submatrix of a processor can be defined
2243    as the submatrix which is obtained by extraction the part corresponding
2244    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2245    first row that belongs to the processor, and r2 is the last row belonging
2246    to the this processor. This is a square mxm matrix. The remaining portion
2247    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2248 
2249    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2250 
2251    By default, this format uses inodes (identical nodes) when possible.
2252    We search for consecutive rows with the same nonzero structure, thereby
2253    reusing matrix information to achieve increased efficiency.
2254 
2255    Options Database Keys:
2256 +  -mat_aij_no_inode  - Do not use inodes
2257 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2258 -  -mat_aij_oneindex - Internally use indexing starting at 1
2259         rather than 0.  Note that when calling MatSetValues(),
2260         the user still MUST index entries starting at 0!
2261 
2262    Example usage:
2263 
2264    Consider the following 8x8 matrix with 34 non-zero values, that is
2265    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2266    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2267    as follows:
2268 
2269 .vb
2270             1  2  0  |  0  3  0  |  0  4
2271     Proc0   0  5  6  |  7  0  0  |  8  0
2272             9  0 10  | 11  0  0  | 12  0
2273     -------------------------------------
2274            13  0 14  | 15 16 17  |  0  0
2275     Proc1   0 18  0  | 19 20 21  |  0  0
2276             0  0  0  | 22 23  0  | 24  0
2277     -------------------------------------
2278     Proc2  25 26 27  |  0  0 28  | 29  0
2279            30  0  0  | 31 32 33  |  0 34
2280 .ve
2281 
2282    This can be represented as a collection of submatrices as:
2283 
2284 .vb
2285       A B C
2286       D E F
2287       G H I
2288 .ve
2289 
2290    Where the submatrices A,B,C are owned by proc0, D,E,F are
2291    owned by proc1, G,H,I are owned by proc2.
2292 
2293    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2294    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2295    The 'M','N' parameters are 8,8, and have the same values on all procs.
2296 
2297    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2298    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2299    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2300    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2301    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2302    matrix, ans [DF] as another SeqAIJ matrix.
2303 
2304    When d_nz, o_nz parameters are specified, d_nz storage elements are
2305    allocated for every row of the local diagonal submatrix, and o_nz
2306    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2307    One way to choose d_nz and o_nz is to use the max nonzerors per local
2308    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2309    In this case, the values of d_nz,o_nz are:
2310 .vb
2311      proc0 : dnz = 2, o_nz = 2
2312      proc1 : dnz = 3, o_nz = 2
2313      proc2 : dnz = 1, o_nz = 4
2314 .ve
2315    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2316    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2317    for proc3. i.e we are using 12+15+10=37 storage locations to store
2318    34 values.
2319 
2320    When d_nnz, o_nnz parameters are specified, the storage is specified
2321    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2322    In the above case the values for d_nnz,o_nnz are:
2323 .vb
2324      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2325      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2326      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2327 .ve
2328    Here the space allocated is sum of all the above values i.e 34, and
2329    hence pre-allocation is perfect.
2330 
2331    Level: intermediate
2332 
2333 .keywords: matrix, aij, compressed row, sparse, parallel
2334 
2335 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2336 @*/
2337 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2338 {
2339   Mat_MPIAIJ   *b;
2340   int          ierr,i;
2341   PetscTruth   flg2;
2342 
2343   PetscFunctionBegin;
2344   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2345   if (!flg2) PetscFunctionReturn(0);
2346   B->preallocated = PETSC_TRUE;
2347   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2348   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2349   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2350   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2351   if (d_nnz) {
2352     for (i=0; i<B->m; i++) {
2353       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2354     }
2355   }
2356   if (o_nnz) {
2357     for (i=0; i<B->m; i++) {
2358       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2359     }
2360   }
2361   b = (Mat_MPIAIJ*)B->data;
2362 
2363   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2364   PetscLogObjectParent(B,b->A);
2365   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2366   PetscLogObjectParent(B,b->B);
2367 
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 #undef __FUNCT__
2372 #define __FUNCT__ "MatCreateMPIAIJ"
2373 /*@C
2374    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2375    (the default parallel PETSc format).  For good matrix assembly performance
2376    the user should preallocate the matrix storage by setting the parameters
2377    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2378    performance can be increased by more than a factor of 50.
2379 
2380    Collective on MPI_Comm
2381 
2382    Input Parameters:
2383 +  comm - MPI communicator
2384 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2385            This value should be the same as the local size used in creating the
2386            y vector for the matrix-vector product y = Ax.
2387 .  n - This value should be the same as the local size used in creating the
2388        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2389        calculated if N is given) For square matrices n is almost always m.
2390 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2391 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2392 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2393            (same value is used for all local rows)
2394 .  d_nnz - array containing the number of nonzeros in the various rows of the
2395            DIAGONAL portion of the local submatrix (possibly different for each row)
2396            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2397            The size of this array is equal to the number of local rows, i.e 'm'.
2398            You must leave room for the diagonal entry even if it is zero.
2399 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2400            submatrix (same value is used for all local rows).
2401 -  o_nnz - array containing the number of nonzeros in the various rows of the
2402            OFF-DIAGONAL portion of the local submatrix (possibly different for
2403            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2404            structure. The size of this array is equal to the number
2405            of local rows, i.e 'm'.
2406 
2407    Output Parameter:
2408 .  A - the matrix
2409 
2410    Notes:
2411    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2412    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2413    storage requirements for this matrix.
2414 
2415    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2416    processor than it must be used on all processors that share the object for
2417    that argument.
2418 
2419    The AIJ format (also called the Yale sparse matrix format or
2420    compressed row storage), is fully compatible with standard Fortran 77
2421    storage.  That is, the stored row and column indices can begin at
2422    either one (as in Fortran) or zero.  See the users manual for details.
2423 
2424    The user MUST specify either the local or global matrix dimensions
2425    (possibly both).
2426 
2427    The parallel matrix is partitioned such that the first m0 rows belong to
2428    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2429    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2430 
2431    The DIAGONAL portion of the local submatrix of a processor can be defined
2432    as the submatrix which is obtained by extraction the part corresponding
2433    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2434    first row that belongs to the processor, and r2 is the last row belonging
2435    to the this processor. This is a square mxm matrix. The remaining portion
2436    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2437 
2438    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2439 
2440    By default, this format uses inodes (identical nodes) when possible.
2441    We search for consecutive rows with the same nonzero structure, thereby
2442    reusing matrix information to achieve increased efficiency.
2443 
2444    Options Database Keys:
2445 +  -mat_aij_no_inode  - Do not use inodes
2446 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2447 -  -mat_aij_oneindex - Internally use indexing starting at 1
2448         rather than 0.  Note that when calling MatSetValues(),
2449         the user still MUST index entries starting at 0!
2450 
2451 
2452    Example usage:
2453 
2454    Consider the following 8x8 matrix with 34 non-zero values, that is
2455    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2456    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2457    as follows:
2458 
2459 .vb
2460             1  2  0  |  0  3  0  |  0  4
2461     Proc0   0  5  6  |  7  0  0  |  8  0
2462             9  0 10  | 11  0  0  | 12  0
2463     -------------------------------------
2464            13  0 14  | 15 16 17  |  0  0
2465     Proc1   0 18  0  | 19 20 21  |  0  0
2466             0  0  0  | 22 23  0  | 24  0
2467     -------------------------------------
2468     Proc2  25 26 27  |  0  0 28  | 29  0
2469            30  0  0  | 31 32 33  |  0 34
2470 .ve
2471 
2472    This can be represented as a collection of submatrices as:
2473 
2474 .vb
2475       A B C
2476       D E F
2477       G H I
2478 .ve
2479 
2480    Where the submatrices A,B,C are owned by proc0, D,E,F are
2481    owned by proc1, G,H,I are owned by proc2.
2482 
2483    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2484    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2485    The 'M','N' parameters are 8,8, and have the same values on all procs.
2486 
2487    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2488    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2489    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2490    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2491    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2492    matrix, ans [DF] as another SeqAIJ matrix.
2493 
2494    When d_nz, o_nz parameters are specified, d_nz storage elements are
2495    allocated for every row of the local diagonal submatrix, and o_nz
2496    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2497    One way to choose d_nz and o_nz is to use the max nonzerors per local
2498    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2499    In this case, the values of d_nz,o_nz are:
2500 .vb
2501      proc0 : dnz = 2, o_nz = 2
2502      proc1 : dnz = 3, o_nz = 2
2503      proc2 : dnz = 1, o_nz = 4
2504 .ve
2505    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2506    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2507    for proc3. i.e we are using 12+15+10=37 storage locations to store
2508    34 values.
2509 
2510    When d_nnz, o_nnz parameters are specified, the storage is specified
2511    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2512    In the above case the values for d_nnz,o_nnz are:
2513 .vb
2514      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2515      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2516      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2517 .ve
2518    Here the space allocated is sum of all the above values i.e 34, and
2519    hence pre-allocation is perfect.
2520 
2521    Level: intermediate
2522 
2523 .keywords: matrix, aij, compressed row, sparse, parallel
2524 
2525 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2526 @*/
2527 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2528 {
2529   int ierr,size;
2530 
2531   PetscFunctionBegin;
2532   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2533   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2534   if (size > 1) {
2535     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2536     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2537   } else {
2538     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2539     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2540   }
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2546 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2547 {
2548   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2549   PetscFunctionBegin;
2550   *Ad     = a->A;
2551   *Ao     = a->B;
2552   *colmap = a->garray;
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 #undef __FUNCT__
2557 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2558 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2559 {
2560   int        ierr,i;
2561   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2562 
2563   PetscFunctionBegin;
2564   if (coloring->ctype == IS_COLORING_LOCAL) {
2565     ISColoringValue *allcolors,*colors;
2566     ISColoring      ocoloring;
2567 
2568     /* set coloring for diagonal portion */
2569     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2570 
2571     /* set coloring for off-diagonal portion */
2572     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2573     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2574     for (i=0; i<a->B->n; i++) {
2575       colors[i] = allcolors[a->garray[i]];
2576     }
2577     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2578     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2579     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2580     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2581   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2582     ISColoringValue *colors;
2583     int             *larray;
2584     ISColoring      ocoloring;
2585 
2586     /* set coloring for diagonal portion */
2587     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2588     for (i=0; i<a->A->n; i++) {
2589       larray[i] = i + a->cstart;
2590     }
2591     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2592     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2593     for (i=0; i<a->A->n; i++) {
2594       colors[i] = coloring->colors[larray[i]];
2595     }
2596     ierr = PetscFree(larray);CHKERRQ(ierr);
2597     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2598     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2599     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2600 
2601     /* set coloring for off-diagonal portion */
2602     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2603     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2604     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2605     for (i=0; i<a->B->n; i++) {
2606       colors[i] = coloring->colors[larray[i]];
2607     }
2608     ierr = PetscFree(larray);CHKERRQ(ierr);
2609     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2610     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2611     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2612   } else {
2613     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2614   }
2615 
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 #undef __FUNCT__
2620 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2621 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2622 {
2623   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2624   int        ierr;
2625 
2626   PetscFunctionBegin;
2627   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2628   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 #undef __FUNCT__
2633 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2634 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2635 {
2636   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2637   int        ierr;
2638 
2639   PetscFunctionBegin;
2640   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2641   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 #undef __FUNCT__
2646 #define __FUNCT__ "MatMerge"
2647 /*@C
2648       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2649                  matrices from each processor
2650 
2651     Collective on MPI_Comm
2652 
2653    Input Parameters:
2654 +    comm - the communicators the parallel matrix will live on
2655 -    inmat - the input sequential matrices
2656 
2657    Output Parameter:
2658 .    outmat - the parallel matrix generated
2659 
2660     Level: advanced
2661 
2662    Notes: The number of columns of the matrix in EACH of the seperate files
2663       MUST be the same.
2664 
2665 @*/
2666 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2667 {
2668   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2669   PetscScalar *values;
2670   PetscMap    columnmap,rowmap;
2671 
2672   PetscFunctionBegin;
2673 
2674   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2675 
2676   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2677   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2678   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2679   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2680   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2681   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2682 
2683   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2684   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2685   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2686   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2687   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2688 
2689   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2690   for (i=0;i<m;i++) {
2691     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2692     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2693     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2694   }
2695   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2696   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2697 
2698   for (i=0;i<m;i++) {
2699     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2700     I    = i + rstart;
2701     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2702     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2703   }
2704   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2705   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2706   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2707 
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatFileSplit"
2713 int MatFileSplit(Mat A,char *outfile)
2714 {
2715   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2716   PetscViewer out;
2717   char        *name;
2718   Mat         B;
2719   PetscScalar *values;
2720 
2721   PetscFunctionBegin;
2722 
2723   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2724   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2725   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2726   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2727   for (i=0;i<m;i++) {
2728     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2729     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2730     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2731   }
2732   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2733   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2734 
2735   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2736   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2737   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2738   sprintf(name,"%s.%d",outfile,rank);
2739   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2740   ierr = PetscFree(name);
2741   ierr = MatView(B,out);CHKERRQ(ierr);
2742   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2743   ierr = MatDestroy(B);CHKERRQ(ierr);
2744   PetscFunctionReturn(0);
2745 }
2746