xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 829b6ff0376e8d5b8b7c6c54aef4e99b13aa00d1)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   if (v) PetscValidScalarPointer(v,6);
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
544   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
551 {
552   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
557   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatZeroRows_MPIAIJ"
563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
564 {
565   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
568   PetscInt       i,*owners = A->rmap->range;
569   PetscInt       *nprocs,j,idx,nsends,row;
570   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
571   PetscInt       *rvalues,count,base,slen,*source;
572   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
573   MPI_Comm       comm = ((PetscObject)A)->comm;
574   MPI_Request    *send_waits,*recv_waits;
575   MPI_Status     recv_status,*send_status;
576 #if defined(PETSC_DEBUG)
577   PetscTruth     found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   /* inform other processors of number of messages and max length*/
608   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
609 
610   /* post receives:   */
611   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
612   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
613   for (i=0; i<nrecvs; i++) {
614     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
615   }
616 
617   /* do sends:
618       1) starts[i] gives the starting index in svalues for stuff going to
619          the ith processor
620   */
621   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
622   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
623   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
624   starts[0] = 0;
625   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
626   for (i=0; i<N; i++) {
627     svalues[starts[owner[i]]++] = rows[i];
628   }
629 
630   starts[0] = 0;
631   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   count = 0;
633   for (i=0; i<size; i++) {
634     if (nprocs[2*i+1]) {
635       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
636     }
637   }
638   ierr = PetscFree(starts);CHKERRQ(ierr);
639 
640   base = owners[rank];
641 
642   /*  wait on receives */
643   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
644   source = lens + nrecvs;
645   count  = nrecvs; slen = 0;
646   while (count) {
647     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
648     /* unpack receives into our local space */
649     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
650     source[imdex]  = recv_status.MPI_SOURCE;
651     lens[imdex]    = n;
652     slen          += n;
653     count--;
654   }
655   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
656 
657   /* move the data into the send scatter */
658   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
659   count = 0;
660   for (i=0; i<nrecvs; i++) {
661     values = rvalues + i*nmax;
662     for (j=0; j<lens[i]; j++) {
663       lrows[count++] = values[j] - base;
664     }
665   }
666   ierr = PetscFree(rvalues);CHKERRQ(ierr);
667   ierr = PetscFree(lens);CHKERRQ(ierr);
668   ierr = PetscFree(owner);CHKERRQ(ierr);
669   ierr = PetscFree(nprocs);CHKERRQ(ierr);
670 
671   /* actually zap the local rows */
672   /*
673         Zero the required rows. If the "diagonal block" of the matrix
674      is square and the user wishes to set the diagonal we use separate
675      code so that MatSetValues() is not called for each diagonal allocating
676      new memory, thus calling lots of mallocs and slowing things down.
677 
678   */
679   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
680   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
681   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
682     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
683   } else if (diag != 0.0) {
684     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
685     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
686       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
688     }
689     for (i = 0; i < slen; i++) {
690       row  = lrows[i] + rstart;
691       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
692     }
693     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695   } else {
696     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
697   }
698   ierr = PetscFree(lrows);CHKERRQ(ierr);
699 
700   /* wait on sends */
701   if (nsends) {
702     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
703     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
704     ierr = PetscFree(send_status);CHKERRQ(ierr);
705   }
706   ierr = PetscFree(send_waits);CHKERRQ(ierr);
707   ierr = PetscFree(svalues);CHKERRQ(ierr);
708 
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatMult_MPIAIJ"
714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
715 {
716   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode ierr;
718   PetscInt       nt;
719 
720   PetscFunctionBegin;
721   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
722   if (nt != A->cmap->n) {
723     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
724   }
725   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
727   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
734 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
735 {
736   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatMultAdd_MPIAIJ"
746 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
747 {
748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
753   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
754   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
755   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
761 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
762 {
763   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765   PetscTruth     merged;
766 
767   PetscFunctionBegin;
768   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
769   /* do nondiagonal part */
770   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
771   if (!merged) {
772     /* send it on its way */
773     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* receive remote parts: note this assumes the values are not actually */
777     /* added in yy until the next line, */
778     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
779   } else {
780     /* do local part */
781     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
782     /* send it on its way */
783     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
784     /* values actually were received in the Begin() but we need to call this nop */
785     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 EXTERN_C_BEGIN
791 #undef __FUNCT__
792 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
793 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
794 {
795   MPI_Comm       comm;
796   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
797   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
798   IS             Me,Notme;
799   PetscErrorCode ierr;
800   PetscInt       M,N,first,last,*notme,i;
801   PetscMPIInt    size;
802 
803   PetscFunctionBegin;
804 
805   /* Easy test: symmetric diagonal block */
806   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
807   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
808   if (!*f) PetscFunctionReturn(0);
809   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
810   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
811   if (size == 1) PetscFunctionReturn(0);
812 
813   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
814   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
815   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
816   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
817   for (i=0; i<first; i++) notme[i] = i;
818   for (i=last; i<M; i++) notme[i-last+first] = i;
819   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
820   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
821   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
822   Aoff = Aoffs[0];
823   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
824   Boff = Boffs[0];
825   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
828   ierr = ISDestroy(Me);CHKERRQ(ierr);
829   ierr = ISDestroy(Notme);CHKERRQ(ierr);
830 
831   PetscFunctionReturn(0);
832 }
833 EXTERN_C_END
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
837 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
838 {
839   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
840   PetscErrorCode ierr;
841 
842   PetscFunctionBegin;
843   /* do nondiagonal part */
844   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
845   /* send it on its way */
846   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
847   /* do local part */
848   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
849   /* receive remote parts */
850   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 /*
855   This only works correctly for square matrices where the subblock A->A is the
856    diagonal block
857 */
858 #undef __FUNCT__
859 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
860 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
861 {
862   PetscErrorCode ierr;
863   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
864 
865   PetscFunctionBegin;
866   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
867   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
868     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   }
870   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatScale_MPIAIJ"
876 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
877 {
878   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
883   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatDestroy_MPIAIJ"
889 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
890 {
891   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
892   PetscErrorCode ierr;
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_LOG)
896   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
897 #endif
898   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
899   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
900   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
901   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
902 #if defined (PETSC_USE_CTABLE)
903   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
904 #else
905   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
906 #endif
907   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
908   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
909   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
910   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
911   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
912   ierr = PetscFree(aij);CHKERRQ(ierr);
913 
914   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 #undef __FUNCT__
927 #define __FUNCT__ "MatView_MPIAIJ_Binary"
928 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
929 {
930   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
931   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
932   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
933   PetscErrorCode    ierr;
934   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
935   int               fd;
936   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
937   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
938   PetscScalar       *column_values;
939 
940   PetscFunctionBegin;
941   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
942   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
943   nz   = A->nz + B->nz;
944   if (!rank) {
945     header[0] = MAT_FILE_COOKIE;
946     header[1] = mat->rmap->N;
947     header[2] = mat->cmap->N;
948     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
949     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
950     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
951     /* get largest number of rows any processor has */
952     rlen = mat->rmap->n;
953     range = mat->rmap->range;
954     for (i=1; i<size; i++) {
955       rlen = PetscMax(rlen,range[i+1] - range[i]);
956     }
957   } else {
958     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
959     rlen = mat->rmap->n;
960   }
961 
962   /* load up the local row counts */
963   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
964   for (i=0; i<mat->rmap->n; i++) {
965     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
966   }
967 
968   /* store the row lengths to the file */
969   if (!rank) {
970     MPI_Status status;
971     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
972     for (i=1; i<size; i++) {
973       rlen = range[i+1] - range[i];
974       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
975       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
976     }
977   } else {
978     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
979   }
980   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
981 
982   /* load up the local column indices */
983   nzmax = nz; /* )th processor needs space a largest processor needs */
984   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
985   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
986   cnt  = 0;
987   for (i=0; i<mat->rmap->n; i++) {
988     for (j=B->i[i]; j<B->i[i+1]; j++) {
989       if ( (col = garray[B->j[j]]) > cstart) break;
990       column_indices[cnt++] = col;
991     }
992     for (k=A->i[i]; k<A->i[i+1]; k++) {
993       column_indices[cnt++] = A->j[k] + cstart;
994     }
995     for (; j<B->i[i+1]; j++) {
996       column_indices[cnt++] = garray[B->j[j]];
997     }
998   }
999   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1000 
1001   /* store the column indices to the file */
1002   if (!rank) {
1003     MPI_Status status;
1004     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1005     for (i=1; i<size; i++) {
1006       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1007       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1008       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1009       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1010     }
1011   } else {
1012     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1014   }
1015   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1016 
1017   /* load up the local column values */
1018   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1019   cnt  = 0;
1020   for (i=0; i<mat->rmap->n; i++) {
1021     for (j=B->i[i]; j<B->i[i+1]; j++) {
1022       if ( garray[B->j[j]] > cstart) break;
1023       column_values[cnt++] = B->a[j];
1024     }
1025     for (k=A->i[i]; k<A->i[i+1]; k++) {
1026       column_values[cnt++] = A->a[k];
1027     }
1028     for (; j<B->i[i+1]; j++) {
1029       column_values[cnt++] = B->a[j];
1030     }
1031   }
1032   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1033 
1034   /* store the column values to the file */
1035   if (!rank) {
1036     MPI_Status status;
1037     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1038     for (i=1; i<size; i++) {
1039       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1040       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1041       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1042       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1043     }
1044   } else {
1045     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1047   }
1048   ierr = PetscFree(column_values);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1054 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1055 {
1056   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1057   PetscErrorCode    ierr;
1058   PetscMPIInt       rank = aij->rank,size = aij->size;
1059   PetscTruth        isdraw,iascii,isbinary;
1060   PetscViewer       sviewer;
1061   PetscViewerFormat format;
1062 
1063   PetscFunctionBegin;
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1066   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1067   if (iascii) {
1068     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1069     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1070       MatInfo    info;
1071       PetscTruth inodes;
1072 
1073       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1074       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1075       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1076       if (!inodes) {
1077         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1078 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1079       } else {
1080         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1081 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1082       }
1083       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1084       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1085       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1086       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1087       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1088       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1089       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1090       PetscFunctionReturn(0);
1091     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1092       PetscInt   inodecount,inodelimit,*inodes;
1093       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1094       if (inodes) {
1095         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1096       } else {
1097         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1098       }
1099       PetscFunctionReturn(0);
1100     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1101       PetscFunctionReturn(0);
1102     }
1103   } else if (isbinary) {
1104     if (size == 1) {
1105       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1106       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1107     } else {
1108       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1109     }
1110     PetscFunctionReturn(0);
1111   } else if (isdraw) {
1112     PetscDraw  draw;
1113     PetscTruth isnull;
1114     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1115     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1116   }
1117 
1118   if (size == 1) {
1119     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1120     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1121   } else {
1122     /* assemble the entire matrix onto first processor. */
1123     Mat         A;
1124     Mat_SeqAIJ  *Aloc;
1125     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1126     MatScalar   *a;
1127 
1128     if (mat->rmap->N > 1024) {
1129       PetscTruth flg = PETSC_FALSE;
1130 
1131       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1132       if (!flg) {
1133         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1134       }
1135     }
1136 
1137     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1138     if (!rank) {
1139       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1140     } else {
1141       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1142     }
1143     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1144     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1145     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1146     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1147 
1148     /* copy over the A part */
1149     Aloc = (Mat_SeqAIJ*)aij->A->data;
1150     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1151     row = mat->rmap->rstart;
1152     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1153     for (i=0; i<m; i++) {
1154       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1155       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1156     }
1157     aj = Aloc->j;
1158     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1159 
1160     /* copy over the B part */
1161     Aloc = (Mat_SeqAIJ*)aij->B->data;
1162     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1163     row  = mat->rmap->rstart;
1164     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1165     ct   = cols;
1166     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1167     for (i=0; i<m; i++) {
1168       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1169       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1170     }
1171     ierr = PetscFree(ct);CHKERRQ(ierr);
1172     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1174     /*
1175        Everyone has to call to draw the matrix since the graphics waits are
1176        synchronized across all processors that share the PetscDraw object
1177     */
1178     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1179     if (!rank) {
1180       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1181       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1182     }
1183     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1184     ierr = MatDestroy(A);CHKERRQ(ierr);
1185   }
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatView_MPIAIJ"
1191 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1192 {
1193   PetscErrorCode ierr;
1194   PetscTruth     iascii,isdraw,issocket,isbinary;
1195 
1196   PetscFunctionBegin;
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1198   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1201   if (iascii || isdraw || isbinary || issocket) {
1202     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1203   } else {
1204     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1205   }
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatSOR_MPIAIJ"
1211 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1212 {
1213   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1214   PetscErrorCode ierr;
1215   Vec            bb1 = 0;
1216   PetscTruth     hasop;
1217 
1218   PetscFunctionBegin;
1219   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1220     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1221   }
1222 
1223   if (flag == SOR_APPLY_UPPER) {
1224     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1225     PetscFunctionReturn(0);
1226   }
1227 
1228   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1229     if (flag & SOR_ZERO_INITIAL_GUESS) {
1230       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1231       its--;
1232     }
1233 
1234     while (its--) {
1235       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1237 
1238       /* update rhs: bb1 = bb - B*x */
1239       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1240       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1241 
1242       /* local sweep */
1243       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1244     }
1245   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1246     if (flag & SOR_ZERO_INITIAL_GUESS) {
1247       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1248       its--;
1249     }
1250     while (its--) {
1251       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1253 
1254       /* update rhs: bb1 = bb - B*x */
1255       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1256       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1257 
1258       /* local sweep */
1259       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1260     }
1261   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1262     if (flag & SOR_ZERO_INITIAL_GUESS) {
1263       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1264       its--;
1265     }
1266     while (its--) {
1267       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269 
1270       /* update rhs: bb1 = bb - B*x */
1271       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1272       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1273 
1274       /* local sweep */
1275       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1276     }
1277   }  else if (flag & SOR_EISENSTAT) {
1278     Vec         xx1;
1279 
1280     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1281     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1282 
1283     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1285     if (!mat->diag) {
1286       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1287       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1288     }
1289     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1290     if (hasop) {
1291       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1292     } else {
1293       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1294     }
1295     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1296 
1297     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1298 
1299     /* local sweep */
1300     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1301     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1302     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1303   } else {
1304     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1305   }
1306 
1307   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatPermute_MPIAIJ"
1313 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1314 {
1315   MPI_Comm       comm,pcomm;
1316   PetscInt       first,local_size,nrows;
1317   const PetscInt *rows;
1318   PetscMPIInt    size;
1319   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1324   /* make a collective version of 'rowp' */
1325   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1326   if (pcomm==comm) {
1327     crowp = rowp;
1328   } else {
1329     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1330     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1331     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1332     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1333   }
1334   /* collect the global row permutation and invert it */
1335   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1336   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1337   if (pcomm!=comm) {
1338     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1339   }
1340   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1341   /* get the local target indices */
1342   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1344   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1345   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1346   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1347   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1348   /* the column permutation is so much easier;
1349      make a local version of 'colp' and invert it */
1350   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1351   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1352   if (size==1) {
1353     lcolp = colp;
1354   } else {
1355     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1356     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1357     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1358   }
1359   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1360   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1361   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1362   if (size>1) {
1363     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1364     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1365   }
1366   /* now we just get the submatrix */
1367   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1368   /* clean up */
1369   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1370   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1376 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1377 {
1378   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1379   Mat            A = mat->A,B = mat->B;
1380   PetscErrorCode ierr;
1381   PetscReal      isend[5],irecv[5];
1382 
1383   PetscFunctionBegin;
1384   info->block_size     = 1.0;
1385   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1386   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1387   isend[3] = info->memory;  isend[4] = info->mallocs;
1388   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1389   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1390   isend[3] += info->memory;  isend[4] += info->mallocs;
1391   if (flag == MAT_LOCAL) {
1392     info->nz_used      = isend[0];
1393     info->nz_allocated = isend[1];
1394     info->nz_unneeded  = isend[2];
1395     info->memory       = isend[3];
1396     info->mallocs      = isend[4];
1397   } else if (flag == MAT_GLOBAL_MAX) {
1398     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1399     info->nz_used      = irecv[0];
1400     info->nz_allocated = irecv[1];
1401     info->nz_unneeded  = irecv[2];
1402     info->memory       = irecv[3];
1403     info->mallocs      = irecv[4];
1404   } else if (flag == MAT_GLOBAL_SUM) {
1405     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1406     info->nz_used      = irecv[0];
1407     info->nz_allocated = irecv[1];
1408     info->nz_unneeded  = irecv[2];
1409     info->memory       = irecv[3];
1410     info->mallocs      = irecv[4];
1411   }
1412   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1413   info->fill_ratio_needed = 0;
1414   info->factor_mallocs    = 0;
1415 
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatSetOption_MPIAIJ"
1421 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1422 {
1423   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   switch (op) {
1428   case MAT_NEW_NONZERO_LOCATIONS:
1429   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1430   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1431   case MAT_KEEP_NONZERO_PATTERN:
1432   case MAT_NEW_NONZERO_LOCATION_ERR:
1433   case MAT_USE_INODES:
1434   case MAT_IGNORE_ZERO_ENTRIES:
1435     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1436     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1437     break;
1438   case MAT_ROW_ORIENTED:
1439     a->roworiented = flg;
1440     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1441     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1442     break;
1443   case MAT_NEW_DIAGONALS:
1444     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1445     break;
1446   case MAT_IGNORE_OFF_PROC_ENTRIES:
1447     a->donotstash = PETSC_TRUE;
1448     break;
1449   case MAT_SYMMETRIC:
1450     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1451     break;
1452   case MAT_STRUCTURALLY_SYMMETRIC:
1453     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1454     break;
1455   case MAT_HERMITIAN:
1456     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1457     break;
1458   case MAT_SYMMETRY_ETERNAL:
1459     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1460     break;
1461   default:
1462     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1463   }
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatGetRow_MPIAIJ"
1469 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1470 {
1471   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1472   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1473   PetscErrorCode ierr;
1474   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1475   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1476   PetscInt       *cmap,*idx_p;
1477 
1478   PetscFunctionBegin;
1479   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1480   mat->getrowactive = PETSC_TRUE;
1481 
1482   if (!mat->rowvalues && (idx || v)) {
1483     /*
1484         allocate enough space to hold information from the longest row.
1485     */
1486     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1487     PetscInt     max = 1,tmp;
1488     for (i=0; i<matin->rmap->n; i++) {
1489       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1490       if (max < tmp) { max = tmp; }
1491     }
1492     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1493     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1494   }
1495 
1496   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1497   lrow = row - rstart;
1498 
1499   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1500   if (!v)   {pvA = 0; pvB = 0;}
1501   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1502   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1503   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1504   nztot = nzA + nzB;
1505 
1506   cmap  = mat->garray;
1507   if (v  || idx) {
1508     if (nztot) {
1509       /* Sort by increasing column numbers, assuming A and B already sorted */
1510       PetscInt imark = -1;
1511       if (v) {
1512         *v = v_p = mat->rowvalues;
1513         for (i=0; i<nzB; i++) {
1514           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1515           else break;
1516         }
1517         imark = i;
1518         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1519         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1520       }
1521       if (idx) {
1522         *idx = idx_p = mat->rowindices;
1523         if (imark > -1) {
1524           for (i=0; i<imark; i++) {
1525             idx_p[i] = cmap[cworkB[i]];
1526           }
1527         } else {
1528           for (i=0; i<nzB; i++) {
1529             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1530             else break;
1531           }
1532           imark = i;
1533         }
1534         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1535         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1536       }
1537     } else {
1538       if (idx) *idx = 0;
1539       if (v)   *v   = 0;
1540     }
1541   }
1542   *nz = nztot;
1543   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1544   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1550 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1551 {
1552   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1553 
1554   PetscFunctionBegin;
1555   if (!aij->getrowactive) {
1556     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1557   }
1558   aij->getrowactive = PETSC_FALSE;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatNorm_MPIAIJ"
1564 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1565 {
1566   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1567   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1568   PetscErrorCode ierr;
1569   PetscInt       i,j,cstart = mat->cmap->rstart;
1570   PetscReal      sum = 0.0;
1571   MatScalar      *v;
1572 
1573   PetscFunctionBegin;
1574   if (aij->size == 1) {
1575     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1576   } else {
1577     if (type == NORM_FROBENIUS) {
1578       v = amat->a;
1579       for (i=0; i<amat->nz; i++) {
1580 #if defined(PETSC_USE_COMPLEX)
1581         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1582 #else
1583         sum += (*v)*(*v); v++;
1584 #endif
1585       }
1586       v = bmat->a;
1587       for (i=0; i<bmat->nz; i++) {
1588 #if defined(PETSC_USE_COMPLEX)
1589         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1590 #else
1591         sum += (*v)*(*v); v++;
1592 #endif
1593       }
1594       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1595       *norm = sqrt(*norm);
1596     } else if (type == NORM_1) { /* max column norm */
1597       PetscReal *tmp,*tmp2;
1598       PetscInt  *jj,*garray = aij->garray;
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1600       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1601       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1602       *norm = 0.0;
1603       v = amat->a; jj = amat->j;
1604       for (j=0; j<amat->nz; j++) {
1605         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1606       }
1607       v = bmat->a; jj = bmat->j;
1608       for (j=0; j<bmat->nz; j++) {
1609         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1610       }
1611       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1612       for (j=0; j<mat->cmap->N; j++) {
1613         if (tmp2[j] > *norm) *norm = tmp2[j];
1614       }
1615       ierr = PetscFree(tmp);CHKERRQ(ierr);
1616       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1617     } else if (type == NORM_INFINITY) { /* max row norm */
1618       PetscReal ntemp = 0.0;
1619       for (j=0; j<aij->A->rmap->n; j++) {
1620         v = amat->a + amat->i[j];
1621         sum = 0.0;
1622         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1623           sum += PetscAbsScalar(*v); v++;
1624         }
1625         v = bmat->a + bmat->i[j];
1626         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1627           sum += PetscAbsScalar(*v); v++;
1628         }
1629         if (sum > ntemp) ntemp = sum;
1630       }
1631       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1632     } else {
1633       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1634     }
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 #undef __FUNCT__
1640 #define __FUNCT__ "MatTranspose_MPIAIJ"
1641 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1642 {
1643   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1644   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1645   PetscErrorCode ierr;
1646   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1647   PetscInt       cstart=A->cmap->rstart,ncol;
1648   Mat            B;
1649   MatScalar      *array;
1650 
1651   PetscFunctionBegin;
1652   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1653 
1654   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1655   ai = Aloc->i; aj = Aloc->j;
1656   bi = Bloc->i; bj = Bloc->j;
1657   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1658     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1659     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1660     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1661     for (i=0; i<ai[ma]; i++){
1662       d_nnz[aj[i]] ++;
1663       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1664     }
1665 
1666     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1667     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1668     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1669     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1670     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1671   } else {
1672     B = *matout;
1673   }
1674 
1675   /* copy over the A part */
1676   array = Aloc->a;
1677   row = A->rmap->rstart;
1678   for (i=0; i<ma; i++) {
1679     ncol = ai[i+1]-ai[i];
1680     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1681     row++; array += ncol; aj += ncol;
1682   }
1683   aj = Aloc->j;
1684   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1685 
1686   /* copy over the B part */
1687   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1688   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1689   array = Bloc->a;
1690   row = A->rmap->rstart;
1691   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1692   cols_tmp = cols;
1693   for (i=0; i<mb; i++) {
1694     ncol = bi[i+1]-bi[i];
1695     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1696     row++; array += ncol; cols_tmp += ncol;
1697   }
1698   ierr = PetscFree(cols);CHKERRQ(ierr);
1699 
1700   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1702   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1703     *matout = B;
1704   } else {
1705     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1706   }
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1712 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1713 {
1714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1715   Mat            a = aij->A,b = aij->B;
1716   PetscErrorCode ierr;
1717   PetscInt       s1,s2,s3;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1721   if (rr) {
1722     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1723     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1724     /* Overlap communication with computation. */
1725     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726   }
1727   if (ll) {
1728     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1729     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1730     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1731   }
1732   /* scale  the diagonal block */
1733   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1734 
1735   if (rr) {
1736     /* Do a scatter end and then right scale the off-diagonal block */
1737     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1738     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1739   }
1740 
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1746 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1747 {
1748   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1749   PetscErrorCode ierr;
1750 
1751   PetscFunctionBegin;
1752   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1753   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1754   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1755   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1760 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1761 {
1762   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1763   PetscErrorCode ierr;
1764 
1765   PetscFunctionBegin;
1766   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatEqual_MPIAIJ"
1772 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1773 {
1774   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1775   Mat            a,b,c,d;
1776   PetscTruth     flg;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   a = matA->A; b = matA->B;
1781   c = matB->A; d = matB->B;
1782 
1783   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1784   if (flg) {
1785     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1786   }
1787   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 #undef __FUNCT__
1792 #define __FUNCT__ "MatCopy_MPIAIJ"
1793 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1794 {
1795   PetscErrorCode ierr;
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1797   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1798 
1799   PetscFunctionBegin;
1800   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1801   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1802     /* because of the column compression in the off-processor part of the matrix a->B,
1803        the number of columns in a->B and b->B may be different, hence we cannot call
1804        the MatCopy() directly on the two parts. If need be, we can provide a more
1805        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1806        then copying the submatrices */
1807     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1808   } else {
1809     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1810     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1811   }
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1817 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1818 {
1819   PetscErrorCode ierr;
1820 
1821   PetscFunctionBegin;
1822   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #include "petscblaslapack.h"
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatAXPY_MPIAIJ"
1829 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1830 {
1831   PetscErrorCode ierr;
1832   PetscInt       i;
1833   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1834   PetscBLASInt   bnz,one=1;
1835   Mat_SeqAIJ     *x,*y;
1836 
1837   PetscFunctionBegin;
1838   if (str == SAME_NONZERO_PATTERN) {
1839     PetscScalar alpha = a;
1840     x = (Mat_SeqAIJ *)xx->A->data;
1841     y = (Mat_SeqAIJ *)yy->A->data;
1842     bnz = PetscBLASIntCast(x->nz);
1843     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1844     x = (Mat_SeqAIJ *)xx->B->data;
1845     y = (Mat_SeqAIJ *)yy->B->data;
1846     bnz = PetscBLASIntCast(x->nz);
1847     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1848   } else if (str == SUBSET_NONZERO_PATTERN) {
1849     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1850 
1851     x = (Mat_SeqAIJ *)xx->B->data;
1852     y = (Mat_SeqAIJ *)yy->B->data;
1853     if (y->xtoy && y->XtoY != xx->B) {
1854       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1855       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1856     }
1857     if (!y->xtoy) { /* get xtoy */
1858       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1859       y->XtoY = xx->B;
1860       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1861     }
1862     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1863   } else {
1864     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatConjugate_MPIAIJ"
1873 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1874 {
1875 #if defined(PETSC_USE_COMPLEX)
1876   PetscErrorCode ierr;
1877   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1878 
1879   PetscFunctionBegin;
1880   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1881   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1882 #else
1883   PetscFunctionBegin;
1884 #endif
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatRealPart_MPIAIJ"
1890 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1891 {
1892   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1893   PetscErrorCode ierr;
1894 
1895   PetscFunctionBegin;
1896   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1897   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1903 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1904 {
1905   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1906   PetscErrorCode ierr;
1907 
1908   PetscFunctionBegin;
1909   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1910   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #ifdef PETSC_HAVE_PBGL
1915 
1916 #include <boost/parallel/mpi/bsp_process_group.hpp>
1917 #include <boost/graph/distributed/ilu_default_graph.hpp>
1918 #include <boost/graph/distributed/ilu_0_block.hpp>
1919 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1920 #include <boost/graph/distributed/petsc/interface.hpp>
1921 #include <boost/multi_array.hpp>
1922 #include <boost/parallel/distributed_property_map->hpp>
1923 
1924 #undef __FUNCT__
1925 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1926 /*
1927   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1928 */
1929 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1930 {
1931   namespace petsc = boost::distributed::petsc;
1932 
1933   namespace graph_dist = boost::graph::distributed;
1934   using boost::graph::distributed::ilu_default::process_group_type;
1935   using boost::graph::ilu_permuted;
1936 
1937   PetscTruth      row_identity, col_identity;
1938   PetscContainer  c;
1939   PetscInt        m, n, M, N;
1940   PetscErrorCode  ierr;
1941 
1942   PetscFunctionBegin;
1943   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1944   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1945   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1946   if (!row_identity || !col_identity) {
1947     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1948   }
1949 
1950   process_group_type pg;
1951   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1952   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1953   lgraph_type&   level_graph = *lgraph_p;
1954   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1955 
1956   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1957   ilu_permuted(level_graph);
1958 
1959   /* put together the new matrix */
1960   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1961   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1962   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1963   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1964   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1965   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1966   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967 
1968   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1969   ierr = PetscContainerSetPointer(c, lgraph_p);
1970   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1976 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1977 {
1978   PetscFunctionBegin;
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "MatSolve_MPIAIJ"
1984 /*
1985   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1986 */
1987 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1988 {
1989   namespace graph_dist = boost::graph::distributed;
1990 
1991   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1992   lgraph_type*   lgraph_p;
1993   PetscContainer c;
1994   PetscErrorCode ierr;
1995 
1996   PetscFunctionBegin;
1997   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1998   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1999   ierr = VecCopy(b, x);CHKERRQ(ierr);
2000 
2001   PetscScalar* array_x;
2002   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2003   PetscInt sx;
2004   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2005 
2006   PetscScalar* array_b;
2007   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2008   PetscInt sb;
2009   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2010 
2011   lgraph_type&   level_graph = *lgraph_p;
2012   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2013 
2014   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2015   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2016                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2017 
2018   typedef boost::iterator_property_map<array_ref_type::iterator,
2019                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2020   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2021                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2022 
2023   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2024 
2025   PetscFunctionReturn(0);
2026 }
2027 #endif
2028 
2029 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2030   PetscInt       nzlocal,nsends,nrecvs;
2031   PetscMPIInt    *send_rank;
2032   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2033   PetscScalar    *sbuf_a,**rbuf_a;
2034   PetscErrorCode (*MatDestroy)(Mat);
2035 } Mat_Redundant;
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2039 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2040 {
2041   PetscErrorCode       ierr;
2042   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2043   PetscInt             i;
2044 
2045   PetscFunctionBegin;
2046   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2047   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2048   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2049   for (i=0; i<redund->nrecvs; i++){
2050     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2051     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2052   }
2053   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2054   ierr = PetscFree(redund);CHKERRQ(ierr);
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatDestroy_MatRedundant"
2060 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2061 {
2062   PetscErrorCode  ierr;
2063   PetscContainer  container;
2064   Mat_Redundant   *redund=PETSC_NULL;
2065 
2066   PetscFunctionBegin;
2067   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2068   if (container) {
2069     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2070   } else {
2071     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2072   }
2073   A->ops->destroy = redund->MatDestroy;
2074   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2075   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2076   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 #undef __FUNCT__
2081 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2082 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2083 {
2084   PetscMPIInt    rank,size;
2085   MPI_Comm       comm=((PetscObject)mat)->comm;
2086   PetscErrorCode ierr;
2087   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2088   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2089   PetscInt       *rowrange=mat->rmap->range;
2090   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2091   Mat            A=aij->A,B=aij->B,C=*matredundant;
2092   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2093   PetscScalar    *sbuf_a;
2094   PetscInt       nzlocal=a->nz+b->nz;
2095   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2096   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2097   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2098   MatScalar      *aworkA,*aworkB;
2099   PetscScalar    *vals;
2100   PetscMPIInt    tag1,tag2,tag3,imdex;
2101   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2102                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2103   MPI_Status     recv_status,*send_status;
2104   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2105   PetscInt       **rbuf_j=PETSC_NULL;
2106   PetscScalar    **rbuf_a=PETSC_NULL;
2107   Mat_Redundant  *redund=PETSC_NULL;
2108   PetscContainer container;
2109 
2110   PetscFunctionBegin;
2111   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2113 
2114   if (reuse == MAT_REUSE_MATRIX) {
2115     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2117     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2118     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2119     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2120     if (container) {
2121       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2122     } else {
2123       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2124     }
2125     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2126 
2127     nsends    = redund->nsends;
2128     nrecvs    = redund->nrecvs;
2129     send_rank = redund->send_rank; recv_rank = send_rank + size;
2130     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2144     recv_rank = send_rank + size;
2145     np_subcomm = size/nsubcomm;
2146     nleftover  = size - nsubcomm*np_subcomm;
2147     nsends = 0; nrecvs = 0;
2148     for (i=0; i<size; i++){ /* i=rank*/
2149       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2150         send_rank[nsends] = i; nsends++;
2151         recv_rank[nrecvs++] = i;
2152       }
2153     }
2154     if (rank >= size - nleftover){/* this proc is a leftover processor */
2155       i = size-nleftover-1;
2156       j = 0;
2157       while (j < nsubcomm - nleftover){
2158         send_rank[nsends++] = i;
2159         i--; j++;
2160       }
2161     }
2162 
2163     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2164       for (i=0; i<nleftover; i++){
2165         recv_rank[nrecvs++] = size-nleftover+i;
2166       }
2167     }
2168 
2169     /* allocate sbuf_j, sbuf_a */
2170     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2171     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2172     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2173   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2174 
2175   /* copy mat's local entries into the buffers */
2176   if (reuse == MAT_INITIAL_MATRIX){
2177     rownz_max = 0;
2178     rptr = sbuf_j;
2179     cols = sbuf_j + rend-rstart + 1;
2180     vals = sbuf_a;
2181     rptr[0] = 0;
2182     for (i=0; i<rend-rstart; i++){
2183       row = i + rstart;
2184       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2185       ncols  = nzA + nzB;
2186       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2187       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2188       /* load the column indices for this row into cols */
2189       lwrite = 0;
2190       for (l=0; l<nzB; l++) {
2191         if ((ctmp = bmap[cworkB[l]]) < cstart){
2192           vals[lwrite]   = aworkB[l];
2193           cols[lwrite++] = ctmp;
2194         }
2195       }
2196       for (l=0; l<nzA; l++){
2197         vals[lwrite]   = aworkA[l];
2198         cols[lwrite++] = cstart + cworkA[l];
2199       }
2200       for (l=0; l<nzB; l++) {
2201         if ((ctmp = bmap[cworkB[l]]) >= cend){
2202           vals[lwrite]   = aworkB[l];
2203           cols[lwrite++] = ctmp;
2204         }
2205       }
2206       vals += ncols;
2207       cols += ncols;
2208       rptr[i+1] = rptr[i] + ncols;
2209       if (rownz_max < ncols) rownz_max = ncols;
2210     }
2211     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2212   } else { /* only copy matrix values into sbuf_a */
2213     rptr = sbuf_j;
2214     vals = sbuf_a;
2215     rptr[0] = 0;
2216     for (i=0; i<rend-rstart; i++){
2217       row = i + rstart;
2218       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2219       ncols  = nzA + nzB;
2220       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2221       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2222       lwrite = 0;
2223       for (l=0; l<nzB; l++) {
2224         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2225       }
2226       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2227       for (l=0; l<nzB; l++) {
2228         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2229       }
2230       vals += ncols;
2231       rptr[i+1] = rptr[i] + ncols;
2232     }
2233   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2234 
2235   /* send nzlocal to others, and recv other's nzlocal */
2236   /*--------------------------------------------------*/
2237   if (reuse == MAT_INITIAL_MATRIX){
2238     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2239     s_waits2 = s_waits3 + nsends;
2240     s_waits1 = s_waits2 + nsends;
2241     r_waits1 = s_waits1 + nsends;
2242     r_waits2 = r_waits1 + nrecvs;
2243     r_waits3 = r_waits2 + nrecvs;
2244   } else {
2245     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2246     r_waits3 = s_waits3 + nsends;
2247   }
2248 
2249   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2250   if (reuse == MAT_INITIAL_MATRIX){
2251     /* get new tags to keep the communication clean */
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2253     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2254     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2255     rbuf_nz = sbuf_nz + nsends;
2256 
2257     /* post receives of other's nzlocal */
2258     for (i=0; i<nrecvs; i++){
2259       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2260     }
2261     /* send nzlocal to others */
2262     for (i=0; i<nsends; i++){
2263       sbuf_nz[i] = nzlocal;
2264       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2265     }
2266     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2267     count = nrecvs;
2268     while (count) {
2269       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2270       recv_rank[imdex] = recv_status.MPI_SOURCE;
2271       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2272       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2273 
2274       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2275       rbuf_nz[imdex] += i + 2;
2276       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2277       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2278       count--;
2279     }
2280     /* wait on sends of nzlocal */
2281     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2282     /* send mat->i,j to others, and recv from other's */
2283     /*------------------------------------------------*/
2284     for (i=0; i<nsends; i++){
2285       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2286       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2287     }
2288     /* wait on receives of mat->i,j */
2289     /*------------------------------*/
2290     count = nrecvs;
2291     while (count) {
2292       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2293       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2294       count--;
2295     }
2296     /* wait on sends of mat->i,j */
2297     /*---------------------------*/
2298     if (nsends) {
2299       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2300     }
2301   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2302 
2303   /* post receives, send and receive mat->a */
2304   /*----------------------------------------*/
2305   for (imdex=0; imdex<nrecvs; imdex++) {
2306     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2307   }
2308   for (i=0; i<nsends; i++){
2309     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2310   }
2311   count = nrecvs;
2312   while (count) {
2313     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2314     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2315     count--;
2316   }
2317   if (nsends) {
2318     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2319   }
2320 
2321   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2322 
2323   /* create redundant matrix */
2324   /*-------------------------*/
2325   if (reuse == MAT_INITIAL_MATRIX){
2326     /* compute rownz_max for preallocation */
2327     for (imdex=0; imdex<nrecvs; imdex++){
2328       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2329       rptr = rbuf_j[imdex];
2330       for (i=0; i<j; i++){
2331         ncols = rptr[i+1] - rptr[i];
2332         if (rownz_max < ncols) rownz_max = ncols;
2333       }
2334     }
2335 
2336     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2337     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2338     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2339     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2340     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2341   } else {
2342     C = *matredundant;
2343   }
2344 
2345   /* insert local matrix entries */
2346   rptr = sbuf_j;
2347   cols = sbuf_j + rend-rstart + 1;
2348   vals = sbuf_a;
2349   for (i=0; i<rend-rstart; i++){
2350     row   = i + rstart;
2351     ncols = rptr[i+1] - rptr[i];
2352     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2353     vals += ncols;
2354     cols += ncols;
2355   }
2356   /* insert received matrix entries */
2357   for (imdex=0; imdex<nrecvs; imdex++){
2358     rstart = rowrange[recv_rank[imdex]];
2359     rend   = rowrange[recv_rank[imdex]+1];
2360     rptr = rbuf_j[imdex];
2361     cols = rbuf_j[imdex] + rend-rstart + 1;
2362     vals = rbuf_a[imdex];
2363     for (i=0; i<rend-rstart; i++){
2364       row   = i + rstart;
2365       ncols = rptr[i+1] - rptr[i];
2366       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2367       vals += ncols;
2368       cols += ncols;
2369     }
2370   }
2371   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2372   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2373   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2374   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2375   if (reuse == MAT_INITIAL_MATRIX){
2376     PetscContainer container;
2377     *matredundant = C;
2378     /* create a supporting struct and attach it to C for reuse */
2379     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2380     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2382     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2383     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2384 
2385     redund->nzlocal = nzlocal;
2386     redund->nsends  = nsends;
2387     redund->nrecvs  = nrecvs;
2388     redund->send_rank = send_rank;
2389     redund->sbuf_nz = sbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718        0
2719 };
2720 
2721 /* ----------------------------------------------------------------------------------------*/
2722 
2723 EXTERN_C_BEGIN
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2727 {
2728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2733   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 EXTERN_C_END
2737 
2738 EXTERN_C_BEGIN
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2742 {
2743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2748   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 EXTERN_C_END
2752 
2753 #include "petscpc.h"
2754 EXTERN_C_BEGIN
2755 #undef __FUNCT__
2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2758 {
2759   Mat_MPIAIJ     *b;
2760   PetscErrorCode ierr;
2761   PetscInt       i;
2762 
2763   PetscFunctionBegin;
2764   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2765   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2766   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2767   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2768 
2769   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2770   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2771   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2773   if (d_nnz) {
2774     for (i=0; i<B->rmap->n; i++) {
2775       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2776     }
2777   }
2778   if (o_nnz) {
2779     for (i=0; i<B->rmap->n; i++) {
2780       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2781     }
2782   }
2783   b = (Mat_MPIAIJ*)B->data;
2784 
2785   if (!B->preallocated) {
2786     /* Explicitly create 2 MATSEQAIJ matrices. */
2787     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2788     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2789     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2791     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2792     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2793     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2794     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2795   }
2796 
2797   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2798   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2799   B->preallocated = PETSC_TRUE;
2800   PetscFunctionReturn(0);
2801 }
2802 EXTERN_C_END
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2807 {
2808   Mat            mat;
2809   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   *newmat       = 0;
2814   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2815   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2816   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2817   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2818   a    = (Mat_MPIAIJ*)mat->data;
2819 
2820   mat->factor       = matin->factor;
2821   mat->rmap->bs      = matin->rmap->bs;
2822   mat->assembled    = PETSC_TRUE;
2823   mat->insertmode   = NOT_SET_VALUES;
2824   mat->preallocated = PETSC_TRUE;
2825 
2826   a->size           = oldmat->size;
2827   a->rank           = oldmat->rank;
2828   a->donotstash     = oldmat->donotstash;
2829   a->roworiented    = oldmat->roworiented;
2830   a->rowindices     = 0;
2831   a->rowvalues      = 0;
2832   a->getrowactive   = PETSC_FALSE;
2833 
2834   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2835   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2836 
2837   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2838   if (oldmat->colmap) {
2839 #if defined (PETSC_USE_CTABLE)
2840     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2841 #else
2842     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2843     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2844     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2845 #endif
2846   } else a->colmap = 0;
2847   if (oldmat->garray) {
2848     PetscInt len;
2849     len  = oldmat->B->cmap->n;
2850     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2851     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2852     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2853   } else a->garray = 0;
2854 
2855   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2856   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2857   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2858   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2859   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2860   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2861   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2862   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2863   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2864   *newmat = mat;
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #include "petscsys.h"
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatLoad_MPIAIJ"
2872 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2873 {
2874   Mat            A;
2875   PetscScalar    *vals,*svals;
2876   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2877   MPI_Status     status;
2878   PetscErrorCode ierr;
2879   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2880   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2881   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2882   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2883   PetscInt       cend,cstart,n,*rowners;
2884   int            fd;
2885 
2886   PetscFunctionBegin;
2887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2888   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2889   if (!rank) {
2890     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2891     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2892     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2893   }
2894 
2895   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2896   M = header[1]; N = header[2];
2897   /* determine ownership of all rows */
2898   m    = M/size + ((M % size) > rank);
2899   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2900   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2901 
2902   /* First process needs enough room for process with most rows */
2903   if (!rank) {
2904     mmax       = rowners[1];
2905     for (i=2; i<size; i++) {
2906       mmax = PetscMax(mmax,rowners[i]);
2907     }
2908   } else mmax = m;
2909 
2910   rowners[0] = 0;
2911   for (i=2; i<=size; i++) {
2912     rowners[i] += rowners[i-1];
2913   }
2914   rstart = rowners[rank];
2915   rend   = rowners[rank+1];
2916 
2917   /* distribute row lengths to all processors */
2918   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2919   if (!rank) {
2920     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2921     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2922     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2923     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2924     for (j=0; j<m; j++) {
2925       procsnz[0] += ourlens[j];
2926     }
2927     for (i=1; i<size; i++) {
2928       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2929       /* calculate the number of nonzeros on each processor */
2930       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2931         procsnz[i] += rowlengths[j];
2932       }
2933       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2934       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2935     }
2936     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2937   } else {
2938     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2939     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2940   }
2941 
2942   if (!rank) {
2943     /* determine max buffer needed and allocate it */
2944     maxnz = 0;
2945     for (i=0; i<size; i++) {
2946       maxnz = PetscMax(maxnz,procsnz[i]);
2947     }
2948     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2949 
2950     /* read in my part of the matrix column indices  */
2951     nz   = procsnz[0];
2952     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2953     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2954 
2955     /* read in every one elses and ship off */
2956     for (i=1; i<size; i++) {
2957       nz     = procsnz[i];
2958       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2959       mpicnt = PetscMPIIntCast(nz);
2960       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2961     }
2962     ierr = PetscFree(cols);CHKERRQ(ierr);
2963   } else {
2964     /* determine buffer space needed for message */
2965     nz = 0;
2966     for (i=0; i<m; i++) {
2967       nz += ourlens[i];
2968     }
2969     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2970 
2971     /* receive message of column indices*/
2972     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2973     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2974     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2975     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2976     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2977     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2978   }
2979 
2980   /* determine column ownership if matrix is not square */
2981   if (N != M) {
2982     n      = N/size + ((N % size) > rank);
2983     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2984     cstart = cend - n;
2985   } else {
2986     cstart = rstart;
2987     cend   = rend;
2988     n      = cend - cstart;
2989   }
2990 
2991   /* loop over local rows, determining number of off diagonal entries */
2992   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2993   jj = 0;
2994   for (i=0; i<m; i++) {
2995     for (j=0; j<ourlens[i]; j++) {
2996       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2997       jj++;
2998     }
2999   }
3000 
3001   /* create our matrix */
3002   for (i=0; i<m; i++) {
3003     ourlens[i] -= offlens[i];
3004   }
3005   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3006   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3007   ierr = MatSetType(A,type);CHKERRQ(ierr);
3008   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3009 
3010   for (i=0; i<m; i++) {
3011     ourlens[i] += offlens[i];
3012   }
3013 
3014   if (!rank) {
3015     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3016 
3017     /* read in my part of the matrix numerical values  */
3018     nz   = procsnz[0];
3019     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3020 
3021     /* insert into matrix */
3022     jj      = rstart;
3023     smycols = mycols;
3024     svals   = vals;
3025     for (i=0; i<m; i++) {
3026       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3027       smycols += ourlens[i];
3028       svals   += ourlens[i];
3029       jj++;
3030     }
3031 
3032     /* read in other processors and ship out */
3033     for (i=1; i<size; i++) {
3034       nz     = procsnz[i];
3035       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3036       mpicnt = PetscMPIIntCast(nz);
3037       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3038     }
3039     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3040   } else {
3041     /* receive numeric values */
3042     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3043 
3044     /* receive message of values*/
3045     mpicnt = PetscMPIIntCast(nz);
3046     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3047     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3048     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3049     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3050     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3051 
3052     /* insert into matrix */
3053     jj      = rstart;
3054     smycols = mycols;
3055     svals   = vals;
3056     for (i=0; i<m; i++) {
3057       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3058       smycols += ourlens[i];
3059       svals   += ourlens[i];
3060       jj++;
3061     }
3062   }
3063   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3064   ierr = PetscFree(vals);CHKERRQ(ierr);
3065   ierr = PetscFree(mycols);CHKERRQ(ierr);
3066   ierr = PetscFree(rowners);CHKERRQ(ierr);
3067 
3068   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3069   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3070   *newmat = A;
3071   PetscFunctionReturn(0);
3072 }
3073 
3074 #undef __FUNCT__
3075 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3076 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3077 {
3078   PetscErrorCode ierr;
3079   IS             iscol_local;
3080   PetscInt       csize;
3081 
3082   PetscFunctionBegin;
3083   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3084   if (call == MAT_REUSE_MATRIX) {
3085     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3086     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3087   } else {
3088     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3089   }
3090   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3091   if (call == MAT_INITIAL_MATRIX) {
3092     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3093     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3094   }
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 #undef __FUNCT__
3099 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3100 /*
3101     Not great since it makes two copies of the submatrix, first an SeqAIJ
3102   in local and then by concatenating the local matrices the end result.
3103   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3104 
3105   Note: This requires a sequential iscol with all indices.
3106 */
3107 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3108 {
3109   PetscErrorCode ierr;
3110   PetscMPIInt    rank,size;
3111   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3112   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3113   Mat            *local,M,Mreuse;
3114   MatScalar      *vwork,*aa;
3115   MPI_Comm       comm = ((PetscObject)mat)->comm;
3116   Mat_SeqAIJ     *aij;
3117 
3118 
3119   PetscFunctionBegin;
3120   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3121   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3122 
3123   if (call ==  MAT_REUSE_MATRIX) {
3124     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3125     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3126     local = &Mreuse;
3127     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3128   } else {
3129     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3130     Mreuse = *local;
3131     ierr   = PetscFree(local);CHKERRQ(ierr);
3132   }
3133 
3134   /*
3135       m - number of local rows
3136       n - number of columns (same on all processors)
3137       rstart - first row in new global matrix generated
3138   */
3139   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3140   if (call == MAT_INITIAL_MATRIX) {
3141     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3142     ii  = aij->i;
3143     jj  = aij->j;
3144 
3145     /*
3146         Determine the number of non-zeros in the diagonal and off-diagonal
3147         portions of the matrix in order to do correct preallocation
3148     */
3149 
3150     /* first get start and end of "diagonal" columns */
3151     if (csize == PETSC_DECIDE) {
3152       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3153       if (mglobal == n) { /* square matrix */
3154 	nlocal = m;
3155       } else {
3156         nlocal = n/size + ((n % size) > rank);
3157       }
3158     } else {
3159       nlocal = csize;
3160     }
3161     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3162     rstart = rend - nlocal;
3163     if (rank == size - 1 && rend != n) {
3164       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3165     }
3166 
3167     /* next, compute all the lengths */
3168     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3169     olens = dlens + m;
3170     for (i=0; i<m; i++) {
3171       jend = ii[i+1] - ii[i];
3172       olen = 0;
3173       dlen = 0;
3174       for (j=0; j<jend; j++) {
3175         if (*jj < rstart || *jj >= rend) olen++;
3176         else dlen++;
3177         jj++;
3178       }
3179       olens[i] = olen;
3180       dlens[i] = dlen;
3181     }
3182     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3183     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3184     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3185     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3186     ierr = PetscFree(dlens);CHKERRQ(ierr);
3187   } else {
3188     PetscInt ml,nl;
3189 
3190     M = *newmat;
3191     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3192     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3193     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3194     /*
3195          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3196        rather than the slower MatSetValues().
3197     */
3198     M->was_assembled = PETSC_TRUE;
3199     M->assembled     = PETSC_FALSE;
3200   }
3201   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3202   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3203   ii  = aij->i;
3204   jj  = aij->j;
3205   aa  = aij->a;
3206   for (i=0; i<m; i++) {
3207     row   = rstart + i;
3208     nz    = ii[i+1] - ii[i];
3209     cwork = jj;     jj += nz;
3210     vwork = aa;     aa += nz;
3211     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3212   }
3213 
3214   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3215   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3216   *newmat = M;
3217 
3218   /* save submatrix used in processor for next request */
3219   if (call ==  MAT_INITIAL_MATRIX) {
3220     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3221     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3222   }
3223 
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 EXTERN_C_BEGIN
3228 #undef __FUNCT__
3229 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3230 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3231 {
3232   PetscInt       m,cstart, cend,j,nnz,i,d;
3233   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3234   const PetscInt *JJ;
3235   PetscScalar    *values;
3236   PetscErrorCode ierr;
3237 
3238   PetscFunctionBegin;
3239   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3240 
3241   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3242   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3243   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3244   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3245   m      = B->rmap->n;
3246   cstart = B->cmap->rstart;
3247   cend   = B->cmap->rend;
3248   rstart = B->rmap->rstart;
3249 
3250   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3251   o_nnz = d_nnz + m;
3252 
3253 #if defined(PETSC_USE_DEBUGGING)
3254   for (i=0; i<m; i++) {
3255     nnz     = Ii[i+1]- Ii[i];
3256     JJ      = J + Ii[i];
3257     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3258     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3259     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3260   }
3261 #endif
3262 
3263   for (i=0; i<m; i++) {
3264     nnz     = Ii[i+1]- Ii[i];
3265     JJ      = J + Ii[i];
3266     nnz_max = PetscMax(nnz_max,nnz);
3267     d       = 0;
3268     for (j=0; j<nnz; j++) {
3269       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3270     }
3271     d_nnz[i] = d;
3272     o_nnz[i] = nnz - d;
3273   }
3274   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3275   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3276 
3277   if (v) values = (PetscScalar*)v;
3278   else {
3279     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3280     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3281   }
3282 
3283   for (i=0; i<m; i++) {
3284     ii   = i + rstart;
3285     nnz  = Ii[i+1]- Ii[i];
3286     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3287   }
3288   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3290 
3291   if (!v) {
3292     ierr = PetscFree(values);CHKERRQ(ierr);
3293   }
3294   PetscFunctionReturn(0);
3295 }
3296 EXTERN_C_END
3297 
3298 #undef __FUNCT__
3299 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3300 /*@
3301    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3302    (the default parallel PETSc format).
3303 
3304    Collective on MPI_Comm
3305 
3306    Input Parameters:
3307 +  B - the matrix
3308 .  i - the indices into j for the start of each local row (starts with zero)
3309 .  j - the column indices for each local row (starts with zero)
3310 -  v - optional values in the matrix
3311 
3312    Level: developer
3313 
3314    Notes:
3315        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3316      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3317      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3318 
3319        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3320 
3321        The format which is used for the sparse matrix input, is equivalent to a
3322     row-major ordering.. i.e for the following matrix, the input data expected is
3323     as shown:
3324 
3325         1 0 0
3326         2 0 3     P0
3327        -------
3328         4 5 6     P1
3329 
3330      Process0 [P0]: rows_owned=[0,1]
3331         i =  {0,1,3}  [size = nrow+1  = 2+1]
3332         j =  {0,0,2}  [size = nz = 6]
3333         v =  {1,2,3}  [size = nz = 6]
3334 
3335      Process1 [P1]: rows_owned=[2]
3336         i =  {0,3}    [size = nrow+1  = 1+1]
3337         j =  {0,1,2}  [size = nz = 6]
3338         v =  {4,5,6}  [size = nz = 6]
3339 
3340 .keywords: matrix, aij, compressed row, sparse, parallel
3341 
3342 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3343           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3344 @*/
3345 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3346 {
3347   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3348 
3349   PetscFunctionBegin;
3350   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3351   if (f) {
3352     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3353   }
3354   PetscFunctionReturn(0);
3355 }
3356 
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3359 /*@C
3360    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3361    (the default parallel PETSc format).  For good matrix assembly performance
3362    the user should preallocate the matrix storage by setting the parameters
3363    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3364    performance can be increased by more than a factor of 50.
3365 
3366    Collective on MPI_Comm
3367 
3368    Input Parameters:
3369 +  A - the matrix
3370 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3371            (same value is used for all local rows)
3372 .  d_nnz - array containing the number of nonzeros in the various rows of the
3373            DIAGONAL portion of the local submatrix (possibly different for each row)
3374            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3375            The size of this array is equal to the number of local rows, i.e 'm'.
3376            You must leave room for the diagonal entry even if it is zero.
3377 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3378            submatrix (same value is used for all local rows).
3379 -  o_nnz - array containing the number of nonzeros in the various rows of the
3380            OFF-DIAGONAL portion of the local submatrix (possibly different for
3381            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3382            structure. The size of this array is equal to the number
3383            of local rows, i.e 'm'.
3384 
3385    If the *_nnz parameter is given then the *_nz parameter is ignored
3386 
3387    The AIJ format (also called the Yale sparse matrix format or
3388    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3389    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3390 
3391    The parallel matrix is partitioned such that the first m0 rows belong to
3392    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3393    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3394 
3395    The DIAGONAL portion of the local submatrix of a processor can be defined
3396    as the submatrix which is obtained by extraction the part corresponding
3397    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3398    first row that belongs to the processor, and r2 is the last row belonging
3399    to the this processor. This is a square mxm matrix. The remaining portion
3400    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3401 
3402    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3403 
3404    You can call MatGetInfo() to get information on how effective the preallocation was;
3405    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3406    You can also run with the option -info and look for messages with the string
3407    malloc in them to see if additional memory allocation was needed.
3408 
3409    Example usage:
3410 
3411    Consider the following 8x8 matrix with 34 non-zero values, that is
3412    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3413    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3414    as follows:
3415 
3416 .vb
3417             1  2  0  |  0  3  0  |  0  4
3418     Proc0   0  5  6  |  7  0  0  |  8  0
3419             9  0 10  | 11  0  0  | 12  0
3420     -------------------------------------
3421            13  0 14  | 15 16 17  |  0  0
3422     Proc1   0 18  0  | 19 20 21  |  0  0
3423             0  0  0  | 22 23  0  | 24  0
3424     -------------------------------------
3425     Proc2  25 26 27  |  0  0 28  | 29  0
3426            30  0  0  | 31 32 33  |  0 34
3427 .ve
3428 
3429    This can be represented as a collection of submatrices as:
3430 
3431 .vb
3432       A B C
3433       D E F
3434       G H I
3435 .ve
3436 
3437    Where the submatrices A,B,C are owned by proc0, D,E,F are
3438    owned by proc1, G,H,I are owned by proc2.
3439 
3440    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3441    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3442    The 'M','N' parameters are 8,8, and have the same values on all procs.
3443 
3444    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3445    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3446    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3447    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3448    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3449    matrix, ans [DF] as another SeqAIJ matrix.
3450 
3451    When d_nz, o_nz parameters are specified, d_nz storage elements are
3452    allocated for every row of the local diagonal submatrix, and o_nz
3453    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3454    One way to choose d_nz and o_nz is to use the max nonzerors per local
3455    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3456    In this case, the values of d_nz,o_nz are:
3457 .vb
3458      proc0 : dnz = 2, o_nz = 2
3459      proc1 : dnz = 3, o_nz = 2
3460      proc2 : dnz = 1, o_nz = 4
3461 .ve
3462    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3463    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3464    for proc3. i.e we are using 12+15+10=37 storage locations to store
3465    34 values.
3466 
3467    When d_nnz, o_nnz parameters are specified, the storage is specified
3468    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3469    In the above case the values for d_nnz,o_nnz are:
3470 .vb
3471      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3472      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3473      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3474 .ve
3475    Here the space allocated is sum of all the above values i.e 34, and
3476    hence pre-allocation is perfect.
3477 
3478    Level: intermediate
3479 
3480 .keywords: matrix, aij, compressed row, sparse, parallel
3481 
3482 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3483           MPIAIJ, MatGetInfo()
3484 @*/
3485 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3486 {
3487   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3488 
3489   PetscFunctionBegin;
3490   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3491   if (f) {
3492     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3493   }
3494   PetscFunctionReturn(0);
3495 }
3496 
3497 #undef __FUNCT__
3498 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3499 /*@
3500      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3501          CSR format the local rows.
3502 
3503    Collective on MPI_Comm
3504 
3505    Input Parameters:
3506 +  comm - MPI communicator
3507 .  m - number of local rows (Cannot be PETSC_DECIDE)
3508 .  n - This value should be the same as the local size used in creating the
3509        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3510        calculated if N is given) For square matrices n is almost always m.
3511 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3512 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3513 .   i - row indices
3514 .   j - column indices
3515 -   a - matrix values
3516 
3517    Output Parameter:
3518 .   mat - the matrix
3519 
3520    Level: intermediate
3521 
3522    Notes:
3523        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3524      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3525      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3526 
3527        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3528 
3529        The format which is used for the sparse matrix input, is equivalent to a
3530     row-major ordering.. i.e for the following matrix, the input data expected is
3531     as shown:
3532 
3533         1 0 0
3534         2 0 3     P0
3535        -------
3536         4 5 6     P1
3537 
3538      Process0 [P0]: rows_owned=[0,1]
3539         i =  {0,1,3}  [size = nrow+1  = 2+1]
3540         j =  {0,0,2}  [size = nz = 6]
3541         v =  {1,2,3}  [size = nz = 6]
3542 
3543      Process1 [P1]: rows_owned=[2]
3544         i =  {0,3}    [size = nrow+1  = 1+1]
3545         j =  {0,1,2}  [size = nz = 6]
3546         v =  {4,5,6}  [size = nz = 6]
3547 
3548 .keywords: matrix, aij, compressed row, sparse, parallel
3549 
3550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3551           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3552 @*/
3553 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3554 {
3555   PetscErrorCode ierr;
3556 
3557  PetscFunctionBegin;
3558   if (i[0]) {
3559     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3560   }
3561   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3562   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3563   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3564   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3565   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3566   PetscFunctionReturn(0);
3567 }
3568 
3569 #undef __FUNCT__
3570 #define __FUNCT__ "MatCreateMPIAIJ"
3571 /*@C
3572    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3573    (the default parallel PETSc format).  For good matrix assembly performance
3574    the user should preallocate the matrix storage by setting the parameters
3575    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3576    performance can be increased by more than a factor of 50.
3577 
3578    Collective on MPI_Comm
3579 
3580    Input Parameters:
3581 +  comm - MPI communicator
3582 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3583            This value should be the same as the local size used in creating the
3584            y vector for the matrix-vector product y = Ax.
3585 .  n - This value should be the same as the local size used in creating the
3586        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3587        calculated if N is given) For square matrices n is almost always m.
3588 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3589 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3590 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3591            (same value is used for all local rows)
3592 .  d_nnz - array containing the number of nonzeros in the various rows of the
3593            DIAGONAL portion of the local submatrix (possibly different for each row)
3594            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3595            The size of this array is equal to the number of local rows, i.e 'm'.
3596            You must leave room for the diagonal entry even if it is zero.
3597 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3598            submatrix (same value is used for all local rows).
3599 -  o_nnz - array containing the number of nonzeros in the various rows of the
3600            OFF-DIAGONAL portion of the local submatrix (possibly different for
3601            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3602            structure. The size of this array is equal to the number
3603            of local rows, i.e 'm'.
3604 
3605    Output Parameter:
3606 .  A - the matrix
3607 
3608    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3609    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3610    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3611 
3612    Notes:
3613    If the *_nnz parameter is given then the *_nz parameter is ignored
3614 
3615    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3616    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3617    storage requirements for this matrix.
3618 
3619    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3620    processor than it must be used on all processors that share the object for
3621    that argument.
3622 
3623    The user MUST specify either the local or global matrix dimensions
3624    (possibly both).
3625 
3626    The parallel matrix is partitioned across processors such that the
3627    first m0 rows belong to process 0, the next m1 rows belong to
3628    process 1, the next m2 rows belong to process 2 etc.. where
3629    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3630    values corresponding to [m x N] submatrix.
3631 
3632    The columns are logically partitioned with the n0 columns belonging
3633    to 0th partition, the next n1 columns belonging to the next
3634    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3635 
3636    The DIAGONAL portion of the local submatrix on any given processor
3637    is the submatrix corresponding to the rows and columns m,n
3638    corresponding to the given processor. i.e diagonal matrix on
3639    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3640    etc. The remaining portion of the local submatrix [m x (N-n)]
3641    constitute the OFF-DIAGONAL portion. The example below better
3642    illustrates this concept.
3643 
3644    For a square global matrix we define each processor's diagonal portion
3645    to be its local rows and the corresponding columns (a square submatrix);
3646    each processor's off-diagonal portion encompasses the remainder of the
3647    local matrix (a rectangular submatrix).
3648 
3649    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3650 
3651    When calling this routine with a single process communicator, a matrix of
3652    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3653    type of communicator, use the construction mechanism:
3654      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3655 
3656    By default, this format uses inodes (identical nodes) when possible.
3657    We search for consecutive rows with the same nonzero structure, thereby
3658    reusing matrix information to achieve increased efficiency.
3659 
3660    Options Database Keys:
3661 +  -mat_no_inode  - Do not use inodes
3662 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3663 -  -mat_aij_oneindex - Internally use indexing starting at 1
3664         rather than 0.  Note that when calling MatSetValues(),
3665         the user still MUST index entries starting at 0!
3666 
3667 
3668    Example usage:
3669 
3670    Consider the following 8x8 matrix with 34 non-zero values, that is
3671    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3672    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3673    as follows:
3674 
3675 .vb
3676             1  2  0  |  0  3  0  |  0  4
3677     Proc0   0  5  6  |  7  0  0  |  8  0
3678             9  0 10  | 11  0  0  | 12  0
3679     -------------------------------------
3680            13  0 14  | 15 16 17  |  0  0
3681     Proc1   0 18  0  | 19 20 21  |  0  0
3682             0  0  0  | 22 23  0  | 24  0
3683     -------------------------------------
3684     Proc2  25 26 27  |  0  0 28  | 29  0
3685            30  0  0  | 31 32 33  |  0 34
3686 .ve
3687 
3688    This can be represented as a collection of submatrices as:
3689 
3690 .vb
3691       A B C
3692       D E F
3693       G H I
3694 .ve
3695 
3696    Where the submatrices A,B,C are owned by proc0, D,E,F are
3697    owned by proc1, G,H,I are owned by proc2.
3698 
3699    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3700    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3701    The 'M','N' parameters are 8,8, and have the same values on all procs.
3702 
3703    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3704    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3705    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3706    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3707    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3708    matrix, ans [DF] as another SeqAIJ matrix.
3709 
3710    When d_nz, o_nz parameters are specified, d_nz storage elements are
3711    allocated for every row of the local diagonal submatrix, and o_nz
3712    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3713    One way to choose d_nz and o_nz is to use the max nonzerors per local
3714    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3715    In this case, the values of d_nz,o_nz are:
3716 .vb
3717      proc0 : dnz = 2, o_nz = 2
3718      proc1 : dnz = 3, o_nz = 2
3719      proc2 : dnz = 1, o_nz = 4
3720 .ve
3721    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3722    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3723    for proc3. i.e we are using 12+15+10=37 storage locations to store
3724    34 values.
3725 
3726    When d_nnz, o_nnz parameters are specified, the storage is specified
3727    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3728    In the above case the values for d_nnz,o_nnz are:
3729 .vb
3730      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3731      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3732      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3733 .ve
3734    Here the space allocated is sum of all the above values i.e 34, and
3735    hence pre-allocation is perfect.
3736 
3737    Level: intermediate
3738 
3739 .keywords: matrix, aij, compressed row, sparse, parallel
3740 
3741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3742           MPIAIJ, MatCreateMPIAIJWithArrays()
3743 @*/
3744 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3745 {
3746   PetscErrorCode ierr;
3747   PetscMPIInt    size;
3748 
3749   PetscFunctionBegin;
3750   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3751   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3752   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3753   if (size > 1) {
3754     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3755     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3756   } else {
3757     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3758     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3759   }
3760   PetscFunctionReturn(0);
3761 }
3762 
3763 #undef __FUNCT__
3764 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3765 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3766 {
3767   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3768 
3769   PetscFunctionBegin;
3770   *Ad     = a->A;
3771   *Ao     = a->B;
3772   *colmap = a->garray;
3773   PetscFunctionReturn(0);
3774 }
3775 
3776 #undef __FUNCT__
3777 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3778 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3779 {
3780   PetscErrorCode ierr;
3781   PetscInt       i;
3782   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3783 
3784   PetscFunctionBegin;
3785   if (coloring->ctype == IS_COLORING_GLOBAL) {
3786     ISColoringValue *allcolors,*colors;
3787     ISColoring      ocoloring;
3788 
3789     /* set coloring for diagonal portion */
3790     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3791 
3792     /* set coloring for off-diagonal portion */
3793     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3794     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3795     for (i=0; i<a->B->cmap->n; i++) {
3796       colors[i] = allcolors[a->garray[i]];
3797     }
3798     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3799     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3800     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3801     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3802   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3803     ISColoringValue *colors;
3804     PetscInt        *larray;
3805     ISColoring      ocoloring;
3806 
3807     /* set coloring for diagonal portion */
3808     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3809     for (i=0; i<a->A->cmap->n; i++) {
3810       larray[i] = i + A->cmap->rstart;
3811     }
3812     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3813     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3814     for (i=0; i<a->A->cmap->n; i++) {
3815       colors[i] = coloring->colors[larray[i]];
3816     }
3817     ierr = PetscFree(larray);CHKERRQ(ierr);
3818     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3819     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3820     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3821 
3822     /* set coloring for off-diagonal portion */
3823     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3824     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3825     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3826     for (i=0; i<a->B->cmap->n; i++) {
3827       colors[i] = coloring->colors[larray[i]];
3828     }
3829     ierr = PetscFree(larray);CHKERRQ(ierr);
3830     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3831     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3832     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3833   } else {
3834     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3835   }
3836 
3837   PetscFunctionReturn(0);
3838 }
3839 
3840 #if defined(PETSC_HAVE_ADIC)
3841 #undef __FUNCT__
3842 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3843 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3844 {
3845   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3846   PetscErrorCode ierr;
3847 
3848   PetscFunctionBegin;
3849   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3850   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3851   PetscFunctionReturn(0);
3852 }
3853 #endif
3854 
3855 #undef __FUNCT__
3856 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3857 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3858 {
3859   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3860   PetscErrorCode ierr;
3861 
3862   PetscFunctionBegin;
3863   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3864   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3865   PetscFunctionReturn(0);
3866 }
3867 
3868 #undef __FUNCT__
3869 #define __FUNCT__ "MatMerge"
3870 /*@
3871       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3872                  matrices from each processor
3873 
3874     Collective on MPI_Comm
3875 
3876    Input Parameters:
3877 +    comm - the communicators the parallel matrix will live on
3878 .    inmat - the input sequential matrices
3879 .    n - number of local columns (or PETSC_DECIDE)
3880 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3881 
3882    Output Parameter:
3883 .    outmat - the parallel matrix generated
3884 
3885     Level: advanced
3886 
3887    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3888 
3889 @*/
3890 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3891 {
3892   PetscErrorCode ierr;
3893   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3894   PetscInt       *indx;
3895   PetscScalar    *values;
3896 
3897   PetscFunctionBegin;
3898   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3899   if (scall == MAT_INITIAL_MATRIX){
3900     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3901     if (n == PETSC_DECIDE){
3902       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3903     }
3904     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3905     rstart -= m;
3906 
3907     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3908     for (i=0;i<m;i++) {
3909       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3910       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3911       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3912     }
3913     /* This routine will ONLY return MPIAIJ type matrix */
3914     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3915     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3916     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3917     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3918     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3919 
3920   } else if (scall == MAT_REUSE_MATRIX){
3921     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3922   } else {
3923     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3924   }
3925 
3926   for (i=0;i<m;i++) {
3927     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3928     Ii    = i + rstart;
3929     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3930     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3931   }
3932   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3933   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3935 
3936   PetscFunctionReturn(0);
3937 }
3938 
3939 #undef __FUNCT__
3940 #define __FUNCT__ "MatFileSplit"
3941 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3942 {
3943   PetscErrorCode    ierr;
3944   PetscMPIInt       rank;
3945   PetscInt          m,N,i,rstart,nnz;
3946   size_t            len;
3947   const PetscInt    *indx;
3948   PetscViewer       out;
3949   char              *name;
3950   Mat               B;
3951   const PetscScalar *values;
3952 
3953   PetscFunctionBegin;
3954   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3955   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3956   /* Should this be the type of the diagonal block of A? */
3957   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3958   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3959   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3960   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3961   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3962   for (i=0;i<m;i++) {
3963     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3964     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3965     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3966   }
3967   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3968   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3969 
3970   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3971   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3972   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3973   sprintf(name,"%s.%d",outfile,rank);
3974   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3975   ierr = PetscFree(name);
3976   ierr = MatView(B,out);CHKERRQ(ierr);
3977   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3978   ierr = MatDestroy(B);CHKERRQ(ierr);
3979   PetscFunctionReturn(0);
3980 }
3981 
3982 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3983 #undef __FUNCT__
3984 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3985 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3986 {
3987   PetscErrorCode       ierr;
3988   Mat_Merge_SeqsToMPI  *merge;
3989   PetscContainer       container;
3990 
3991   PetscFunctionBegin;
3992   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3993   if (container) {
3994     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4005     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4006 
4007     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4008     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4009   }
4010   ierr = PetscFree(merge);CHKERRQ(ierr);
4011 
4012   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4013   PetscFunctionReturn(0);
4014 }
4015 
4016 #include "../src/mat/utils/freespace.h"
4017 #include "petscbt.h"
4018 
4019 #undef __FUNCT__
4020 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4021 /*@C
4022       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4023                  matrices from each processor
4024 
4025     Collective on MPI_Comm
4026 
4027    Input Parameters:
4028 +    comm - the communicators the parallel matrix will live on
4029 .    seqmat - the input sequential matrices
4030 .    m - number of local rows (or PETSC_DECIDE)
4031 .    n - number of local columns (or PETSC_DECIDE)
4032 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4033 
4034    Output Parameter:
4035 .    mpimat - the parallel matrix generated
4036 
4037     Level: advanced
4038 
4039    Notes:
4040      The dimensions of the sequential matrix in each processor MUST be the same.
4041      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4042      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4043 @*/
4044 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4045 {
4046   PetscErrorCode       ierr;
4047   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4048   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4049   PetscMPIInt          size,rank,taga,*len_s;
4050   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4051   PetscInt             proc,m;
4052   PetscInt             **buf_ri,**buf_rj;
4053   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4054   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4055   MPI_Request          *s_waits,*r_waits;
4056   MPI_Status           *status;
4057   MatScalar            *aa=a->a;
4058   MatScalar            **abuf_r,*ba_i;
4059   Mat_Merge_SeqsToMPI  *merge;
4060   PetscContainer       container;
4061 
4062   PetscFunctionBegin;
4063   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4064 
4065   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4066   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4067 
4068   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4069   if (container) {
4070     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4071   }
4072   bi     = merge->bi;
4073   bj     = merge->bj;
4074   buf_ri = merge->buf_ri;
4075   buf_rj = merge->buf_rj;
4076 
4077   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4078   owners = merge->rowmap->range;
4079   len_s  = merge->len_s;
4080 
4081   /* send and recv matrix values */
4082   /*-----------------------------*/
4083   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4084   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4085 
4086   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4087   for (proc=0,k=0; proc<size; proc++){
4088     if (!len_s[proc]) continue;
4089     i = owners[proc];
4090     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4091     k++;
4092   }
4093 
4094   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4095   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4096   ierr = PetscFree(status);CHKERRQ(ierr);
4097 
4098   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4099   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4100 
4101   /* insert mat values of mpimat */
4102   /*----------------------------*/
4103   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4104   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4105   nextrow = buf_ri_k + merge->nrecv;
4106   nextai  = nextrow + merge->nrecv;
4107 
4108   for (k=0; k<merge->nrecv; k++){
4109     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4110     nrows = *(buf_ri_k[k]);
4111     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4112     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4113   }
4114 
4115   /* set values of ba */
4116   m = merge->rowmap->n;
4117   for (i=0; i<m; i++) {
4118     arow = owners[rank] + i;
4119     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4120     bnzi = bi[i+1] - bi[i];
4121     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4122 
4123     /* add local non-zero vals of this proc's seqmat into ba */
4124     anzi = ai[arow+1] - ai[arow];
4125     aj   = a->j + ai[arow];
4126     aa   = a->a + ai[arow];
4127     nextaj = 0;
4128     for (j=0; nextaj<anzi; j++){
4129       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4130         ba_i[j] += aa[nextaj++];
4131       }
4132     }
4133 
4134     /* add received vals into ba */
4135     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4136       /* i-th row */
4137       if (i == *nextrow[k]) {
4138         anzi = *(nextai[k]+1) - *nextai[k];
4139         aj   = buf_rj[k] + *(nextai[k]);
4140         aa   = abuf_r[k] + *(nextai[k]);
4141         nextaj = 0;
4142         for (j=0; nextaj<anzi; j++){
4143           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4144             ba_i[j] += aa[nextaj++];
4145           }
4146         }
4147         nextrow[k]++; nextai[k]++;
4148       }
4149     }
4150     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4151   }
4152   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4153   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4154 
4155   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4156   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4157   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4158   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4159   PetscFunctionReturn(0);
4160 }
4161 
4162 #undef __FUNCT__
4163 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4164 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4165 {
4166   PetscErrorCode       ierr;
4167   Mat                  B_mpi;
4168   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4169   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4170   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4171   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4172   PetscInt             len,proc,*dnz,*onz;
4173   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4174   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4175   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4176   MPI_Status           *status;
4177   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4178   PetscBT              lnkbt;
4179   Mat_Merge_SeqsToMPI  *merge;
4180   PetscContainer       container;
4181 
4182   PetscFunctionBegin;
4183   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4184 
4185   /* make sure it is a PETSc comm */
4186   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4187   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4188   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4189 
4190   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4191   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4192 
4193   /* determine row ownership */
4194   /*---------------------------------------------------------*/
4195   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4196   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4197   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4198   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4199   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4200   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4201   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4202 
4203   m      = merge->rowmap->n;
4204   M      = merge->rowmap->N;
4205   owners = merge->rowmap->range;
4206 
4207   /* determine the number of messages to send, their lengths */
4208   /*---------------------------------------------------------*/
4209   len_s  = merge->len_s;
4210 
4211   len = 0;  /* length of buf_si[] */
4212   merge->nsend = 0;
4213   for (proc=0; proc<size; proc++){
4214     len_si[proc] = 0;
4215     if (proc == rank){
4216       len_s[proc] = 0;
4217     } else {
4218       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4219       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4220     }
4221     if (len_s[proc]) {
4222       merge->nsend++;
4223       nrows = 0;
4224       for (i=owners[proc]; i<owners[proc+1]; i++){
4225         if (ai[i+1] > ai[i]) nrows++;
4226       }
4227       len_si[proc] = 2*(nrows+1);
4228       len += len_si[proc];
4229     }
4230   }
4231 
4232   /* determine the number and length of messages to receive for ij-structure */
4233   /*-------------------------------------------------------------------------*/
4234   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4235   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4236 
4237   /* post the Irecv of j-structure */
4238   /*-------------------------------*/
4239   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4240   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4241 
4242   /* post the Isend of j-structure */
4243   /*--------------------------------*/
4244   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4245   sj_waits = si_waits + merge->nsend;
4246 
4247   for (proc=0, k=0; proc<size; proc++){
4248     if (!len_s[proc]) continue;
4249     i = owners[proc];
4250     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4251     k++;
4252   }
4253 
4254   /* receives and sends of j-structure are complete */
4255   /*------------------------------------------------*/
4256   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4257   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4258 
4259   /* send and recv i-structure */
4260   /*---------------------------*/
4261   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4262   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4263 
4264   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4265   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4266   for (proc=0,k=0; proc<size; proc++){
4267     if (!len_s[proc]) continue;
4268     /* form outgoing message for i-structure:
4269          buf_si[0]:                 nrows to be sent
4270                [1:nrows]:           row index (global)
4271                [nrows+1:2*nrows+1]: i-structure index
4272     */
4273     /*-------------------------------------------*/
4274     nrows = len_si[proc]/2 - 1;
4275     buf_si_i    = buf_si + nrows+1;
4276     buf_si[0]   = nrows;
4277     buf_si_i[0] = 0;
4278     nrows = 0;
4279     for (i=owners[proc]; i<owners[proc+1]; i++){
4280       anzi = ai[i+1] - ai[i];
4281       if (anzi) {
4282         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4283         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4284         nrows++;
4285       }
4286     }
4287     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4288     k++;
4289     buf_si += len_si[proc];
4290   }
4291 
4292   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4293   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4294 
4295   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4296   for (i=0; i<merge->nrecv; i++){
4297     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4298   }
4299 
4300   ierr = PetscFree(len_si);CHKERRQ(ierr);
4301   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4302   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4303   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4304   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4305   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4306   ierr = PetscFree(status);CHKERRQ(ierr);
4307 
4308   /* compute a local seq matrix in each processor */
4309   /*----------------------------------------------*/
4310   /* allocate bi array and free space for accumulating nonzero column info */
4311   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4312   bi[0] = 0;
4313 
4314   /* create and initialize a linked list */
4315   nlnk = N+1;
4316   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4317 
4318   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4319   len = 0;
4320   len  = ai[owners[rank+1]] - ai[owners[rank]];
4321   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4322   current_space = free_space;
4323 
4324   /* determine symbolic info for each local row */
4325   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4326   nextrow = buf_ri_k + merge->nrecv;
4327   nextai  = nextrow + merge->nrecv;
4328   for (k=0; k<merge->nrecv; k++){
4329     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4330     nrows = *buf_ri_k[k];
4331     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4332     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4333   }
4334 
4335   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4336   len = 0;
4337   for (i=0;i<m;i++) {
4338     bnzi   = 0;
4339     /* add local non-zero cols of this proc's seqmat into lnk */
4340     arow   = owners[rank] + i;
4341     anzi   = ai[arow+1] - ai[arow];
4342     aj     = a->j + ai[arow];
4343     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4344     bnzi += nlnk;
4345     /* add received col data into lnk */
4346     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4347       if (i == *nextrow[k]) { /* i-th row */
4348         anzi = *(nextai[k]+1) - *nextai[k];
4349         aj   = buf_rj[k] + *nextai[k];
4350         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4351         bnzi += nlnk;
4352         nextrow[k]++; nextai[k]++;
4353       }
4354     }
4355     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4356 
4357     /* if free space is not available, make more free space */
4358     if (current_space->local_remaining<bnzi) {
4359       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4360       nspacedouble++;
4361     }
4362     /* copy data into free space, then initialize lnk */
4363     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4364     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4365 
4366     current_space->array           += bnzi;
4367     current_space->local_used      += bnzi;
4368     current_space->local_remaining -= bnzi;
4369 
4370     bi[i+1] = bi[i] + bnzi;
4371   }
4372 
4373   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4374 
4375   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4376   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4377   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4378 
4379   /* create symbolic parallel matrix B_mpi */
4380   /*---------------------------------------*/
4381   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4382   if (n==PETSC_DECIDE) {
4383     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4384   } else {
4385     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4386   }
4387   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4388   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4389   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4390 
4391   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4392   B_mpi->assembled     = PETSC_FALSE;
4393   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4394   merge->bi            = bi;
4395   merge->bj            = bj;
4396   merge->buf_ri        = buf_ri;
4397   merge->buf_rj        = buf_rj;
4398   merge->coi           = PETSC_NULL;
4399   merge->coj           = PETSC_NULL;
4400   merge->owners_co     = PETSC_NULL;
4401 
4402   /* attach the supporting struct to B_mpi for reuse */
4403   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4404   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4405   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4406   *mpimat = B_mpi;
4407 
4408   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4409   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4410   PetscFunctionReturn(0);
4411 }
4412 
4413 #undef __FUNCT__
4414 #define __FUNCT__ "MatMerge_SeqsToMPI"
4415 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4416 {
4417   PetscErrorCode   ierr;
4418 
4419   PetscFunctionBegin;
4420   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4421   if (scall == MAT_INITIAL_MATRIX){
4422     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4423   }
4424   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4425   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 #undef __FUNCT__
4430 #define __FUNCT__ "MatGetLocalMat"
4431 /*@
4432      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4433 
4434     Not Collective
4435 
4436    Input Parameters:
4437 +    A - the matrix
4438 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4439 
4440    Output Parameter:
4441 .    A_loc - the local sequential matrix generated
4442 
4443     Level: developer
4444 
4445 @*/
4446 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4447 {
4448   PetscErrorCode  ierr;
4449   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4450   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4451   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4452   MatScalar       *aa=a->a,*ba=b->a,*cam;
4453   PetscScalar     *ca;
4454   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4455   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4456 
4457   PetscFunctionBegin;
4458   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4459   if (scall == MAT_INITIAL_MATRIX){
4460     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4461     ci[0] = 0;
4462     for (i=0; i<am; i++){
4463       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4464     }
4465     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4466     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4467     k = 0;
4468     for (i=0; i<am; i++) {
4469       ncols_o = bi[i+1] - bi[i];
4470       ncols_d = ai[i+1] - ai[i];
4471       /* off-diagonal portion of A */
4472       for (jo=0; jo<ncols_o; jo++) {
4473         col = cmap[*bj];
4474         if (col >= cstart) break;
4475         cj[k]   = col; bj++;
4476         ca[k++] = *ba++;
4477       }
4478       /* diagonal portion of A */
4479       for (j=0; j<ncols_d; j++) {
4480         cj[k]   = cstart + *aj++;
4481         ca[k++] = *aa++;
4482       }
4483       /* off-diagonal portion of A */
4484       for (j=jo; j<ncols_o; j++) {
4485         cj[k]   = cmap[*bj++];
4486         ca[k++] = *ba++;
4487       }
4488     }
4489     /* put together the new matrix */
4490     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4491     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4492     /* Since these are PETSc arrays, change flags to free them as necessary. */
4493     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4494     mat->free_a  = PETSC_TRUE;
4495     mat->free_ij = PETSC_TRUE;
4496     mat->nonew   = 0;
4497   } else if (scall == MAT_REUSE_MATRIX){
4498     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4499     ci = mat->i; cj = mat->j; cam = mat->a;
4500     for (i=0; i<am; i++) {
4501       /* off-diagonal portion of A */
4502       ncols_o = bi[i+1] - bi[i];
4503       for (jo=0; jo<ncols_o; jo++) {
4504         col = cmap[*bj];
4505         if (col >= cstart) break;
4506         *cam++ = *ba++; bj++;
4507       }
4508       /* diagonal portion of A */
4509       ncols_d = ai[i+1] - ai[i];
4510       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4511       /* off-diagonal portion of A */
4512       for (j=jo; j<ncols_o; j++) {
4513         *cam++ = *ba++; bj++;
4514       }
4515     }
4516   } else {
4517     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4518   }
4519 
4520   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4521   PetscFunctionReturn(0);
4522 }
4523 
4524 #undef __FUNCT__
4525 #define __FUNCT__ "MatGetLocalMatCondensed"
4526 /*@C
4527      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4528 
4529     Not Collective
4530 
4531    Input Parameters:
4532 +    A - the matrix
4533 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4534 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4535 
4536    Output Parameter:
4537 .    A_loc - the local sequential matrix generated
4538 
4539     Level: developer
4540 
4541 @*/
4542 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4543 {
4544   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4545   PetscErrorCode    ierr;
4546   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4547   IS                isrowa,iscola;
4548   Mat               *aloc;
4549 
4550   PetscFunctionBegin;
4551   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4552   if (!row){
4553     start = A->rmap->rstart; end = A->rmap->rend;
4554     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4555   } else {
4556     isrowa = *row;
4557   }
4558   if (!col){
4559     start = A->cmap->rstart;
4560     cmap  = a->garray;
4561     nzA   = a->A->cmap->n;
4562     nzB   = a->B->cmap->n;
4563     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4564     ncols = 0;
4565     for (i=0; i<nzB; i++) {
4566       if (cmap[i] < start) idx[ncols++] = cmap[i];
4567       else break;
4568     }
4569     imark = i;
4570     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4571     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4572     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4573     ierr = PetscFree(idx);CHKERRQ(ierr);
4574   } else {
4575     iscola = *col;
4576   }
4577   if (scall != MAT_INITIAL_MATRIX){
4578     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4579     aloc[0] = *A_loc;
4580   }
4581   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4582   *A_loc = aloc[0];
4583   ierr = PetscFree(aloc);CHKERRQ(ierr);
4584   if (!row){
4585     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4586   }
4587   if (!col){
4588     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4589   }
4590   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4591   PetscFunctionReturn(0);
4592 }
4593 
4594 #undef __FUNCT__
4595 #define __FUNCT__ "MatGetBrowsOfAcols"
4596 /*@C
4597     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4598 
4599     Collective on Mat
4600 
4601    Input Parameters:
4602 +    A,B - the matrices in mpiaij format
4603 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4604 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4605 
4606    Output Parameter:
4607 +    rowb, colb - index sets of rows and columns of B to extract
4608 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4609 -    B_seq - the sequential matrix generated
4610 
4611     Level: developer
4612 
4613 @*/
4614 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4615 {
4616   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4617   PetscErrorCode    ierr;
4618   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4619   IS                isrowb,iscolb;
4620   Mat               *bseq;
4621 
4622   PetscFunctionBegin;
4623   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4624     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4625   }
4626   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4627 
4628   if (scall == MAT_INITIAL_MATRIX){
4629     start = A->cmap->rstart;
4630     cmap  = a->garray;
4631     nzA   = a->A->cmap->n;
4632     nzB   = a->B->cmap->n;
4633     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4634     ncols = 0;
4635     for (i=0; i<nzB; i++) {  /* row < local row index */
4636       if (cmap[i] < start) idx[ncols++] = cmap[i];
4637       else break;
4638     }
4639     imark = i;
4640     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4641     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4642     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4643     ierr = PetscFree(idx);CHKERRQ(ierr);
4644     *brstart = imark;
4645     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4646   } else {
4647     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4648     isrowb = *rowb; iscolb = *colb;
4649     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4650     bseq[0] = *B_seq;
4651   }
4652   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4653   *B_seq = bseq[0];
4654   ierr = PetscFree(bseq);CHKERRQ(ierr);
4655   if (!rowb){
4656     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4657   } else {
4658     *rowb = isrowb;
4659   }
4660   if (!colb){
4661     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4662   } else {
4663     *colb = iscolb;
4664   }
4665   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4666   PetscFunctionReturn(0);
4667 }
4668 
4669 #undef __FUNCT__
4670 #define __FUNCT__ "MatGetBrowsOfAoCols"
4671 /*@C
4672     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4673     of the OFF-DIAGONAL portion of local A
4674 
4675     Collective on Mat
4676 
4677    Input Parameters:
4678 +    A,B - the matrices in mpiaij format
4679 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4680 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4681 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4682 
4683    Output Parameter:
4684 +    B_oth - the sequential matrix generated
4685 
4686     Level: developer
4687 
4688 @*/
4689 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4690 {
4691   VecScatter_MPI_General *gen_to,*gen_from;
4692   PetscErrorCode         ierr;
4693   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4694   Mat_SeqAIJ             *b_oth;
4695   VecScatter             ctx=a->Mvctx;
4696   MPI_Comm               comm=((PetscObject)ctx)->comm;
4697   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4698   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4699   PetscScalar            *rvalues,*svalues;
4700   MatScalar              *b_otha,*bufa,*bufA;
4701   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4702   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4703   MPI_Status             *sstatus,rstatus;
4704   PetscMPIInt            jj;
4705   PetscInt               *cols,sbs,rbs;
4706   PetscScalar            *vals;
4707 
4708   PetscFunctionBegin;
4709   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4710     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4711   }
4712   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4713   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4714 
4715   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4716   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4717   rvalues  = gen_from->values; /* holds the length of receiving row */
4718   svalues  = gen_to->values;   /* holds the length of sending row */
4719   nrecvs   = gen_from->n;
4720   nsends   = gen_to->n;
4721 
4722   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4723   srow     = gen_to->indices;   /* local row index to be sent */
4724   sstarts  = gen_to->starts;
4725   sprocs   = gen_to->procs;
4726   sstatus  = gen_to->sstatus;
4727   sbs      = gen_to->bs;
4728   rstarts  = gen_from->starts;
4729   rprocs   = gen_from->procs;
4730   rbs      = gen_from->bs;
4731 
4732   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4733   if (scall == MAT_INITIAL_MATRIX){
4734     /* i-array */
4735     /*---------*/
4736     /*  post receives */
4737     for (i=0; i<nrecvs; i++){
4738       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4739       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4740       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4741     }
4742 
4743     /* pack the outgoing message */
4744     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4745     rstartsj = sstartsj + nsends +1;
4746     sstartsj[0] = 0;  rstartsj[0] = 0;
4747     len = 0; /* total length of j or a array to be sent */
4748     k = 0;
4749     for (i=0; i<nsends; i++){
4750       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4751       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4752       for (j=0; j<nrows; j++) {
4753         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4754         for (l=0; l<sbs; l++){
4755           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4756           rowlen[j*sbs+l] = ncols;
4757           len += ncols;
4758           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4759         }
4760         k++;
4761       }
4762       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4763       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4764     }
4765     /* recvs and sends of i-array are completed */
4766     i = nrecvs;
4767     while (i--) {
4768       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4769     }
4770     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4771 
4772     /* allocate buffers for sending j and a arrays */
4773     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4774     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4775 
4776     /* create i-array of B_oth */
4777     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4778     b_othi[0] = 0;
4779     len = 0; /* total length of j or a array to be received */
4780     k = 0;
4781     for (i=0; i<nrecvs; i++){
4782       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4783       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4784       for (j=0; j<nrows; j++) {
4785         b_othi[k+1] = b_othi[k] + rowlen[j];
4786         len += rowlen[j]; k++;
4787       }
4788       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4789     }
4790 
4791     /* allocate space for j and a arrrays of B_oth */
4792     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4793     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4794 
4795     /* j-array */
4796     /*---------*/
4797     /*  post receives of j-array */
4798     for (i=0; i<nrecvs; i++){
4799       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4800       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4801     }
4802 
4803     /* pack the outgoing message j-array */
4804     k = 0;
4805     for (i=0; i<nsends; i++){
4806       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4807       bufJ = bufj+sstartsj[i];
4808       for (j=0; j<nrows; j++) {
4809         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4810         for (ll=0; ll<sbs; ll++){
4811           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4812           for (l=0; l<ncols; l++){
4813             *bufJ++ = cols[l];
4814           }
4815           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4816         }
4817       }
4818       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4819     }
4820 
4821     /* recvs and sends of j-array are completed */
4822     i = nrecvs;
4823     while (i--) {
4824       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4825     }
4826     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4827   } else if (scall == MAT_REUSE_MATRIX){
4828     sstartsj = *startsj;
4829     rstartsj = sstartsj + nsends +1;
4830     bufa     = *bufa_ptr;
4831     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4832     b_otha   = b_oth->a;
4833   } else {
4834     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4835   }
4836 
4837   /* a-array */
4838   /*---------*/
4839   /*  post receives of a-array */
4840   for (i=0; i<nrecvs; i++){
4841     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4842     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4843   }
4844 
4845   /* pack the outgoing message a-array */
4846   k = 0;
4847   for (i=0; i<nsends; i++){
4848     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4849     bufA = bufa+sstartsj[i];
4850     for (j=0; j<nrows; j++) {
4851       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4852       for (ll=0; ll<sbs; ll++){
4853         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4854         for (l=0; l<ncols; l++){
4855           *bufA++ = vals[l];
4856         }
4857         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4858       }
4859     }
4860     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4861   }
4862   /* recvs and sends of a-array are completed */
4863   i = nrecvs;
4864   while (i--) {
4865     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4866   }
4867   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4868   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4869 
4870   if (scall == MAT_INITIAL_MATRIX){
4871     /* put together the new matrix */
4872     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4873 
4874     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4875     /* Since these are PETSc arrays, change flags to free them as necessary. */
4876     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4877     b_oth->free_a  = PETSC_TRUE;
4878     b_oth->free_ij = PETSC_TRUE;
4879     b_oth->nonew   = 0;
4880 
4881     ierr = PetscFree(bufj);CHKERRQ(ierr);
4882     if (!startsj || !bufa_ptr){
4883       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4884       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4885     } else {
4886       *startsj  = sstartsj;
4887       *bufa_ptr = bufa;
4888     }
4889   }
4890   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4891   PetscFunctionReturn(0);
4892 }
4893 
4894 #undef __FUNCT__
4895 #define __FUNCT__ "MatGetCommunicationStructs"
4896 /*@C
4897   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4898 
4899   Not Collective
4900 
4901   Input Parameters:
4902 . A - The matrix in mpiaij format
4903 
4904   Output Parameter:
4905 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4906 . colmap - A map from global column index to local index into lvec
4907 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4908 
4909   Level: developer
4910 
4911 @*/
4912 #if defined (PETSC_USE_CTABLE)
4913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4914 #else
4915 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4916 #endif
4917 {
4918   Mat_MPIAIJ *a;
4919 
4920   PetscFunctionBegin;
4921   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4922   PetscValidPointer(lvec, 2)
4923   PetscValidPointer(colmap, 3)
4924   PetscValidPointer(multScatter, 4)
4925   a = (Mat_MPIAIJ *) A->data;
4926   if (lvec) *lvec = a->lvec;
4927   if (colmap) *colmap = a->colmap;
4928   if (multScatter) *multScatter = a->Mvctx;
4929   PetscFunctionReturn(0);
4930 }
4931 
4932 EXTERN_C_BEGIN
4933 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4934 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4935 EXTERN_C_END
4936 
4937 #undef __FUNCT__
4938 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4939 /*
4940     Computes (B'*A')' since computing B*A directly is untenable
4941 
4942                n                       p                          p
4943         (              )       (              )         (                  )
4944       m (      A       )  *  n (       B      )   =   m (         C        )
4945         (              )       (              )         (                  )
4946 
4947 */
4948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4949 {
4950   PetscErrorCode     ierr;
4951   Mat                At,Bt,Ct;
4952 
4953   PetscFunctionBegin;
4954   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4955   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4956   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4957   ierr = MatDestroy(At);CHKERRQ(ierr);
4958   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4959   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4960   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4961   PetscFunctionReturn(0);
4962 }
4963 
4964 #undef __FUNCT__
4965 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4966 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4967 {
4968   PetscErrorCode ierr;
4969   PetscInt       m=A->rmap->n,n=B->cmap->n;
4970   Mat            Cmat;
4971 
4972   PetscFunctionBegin;
4973   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4974   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4975   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4976   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4977   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4978   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4979   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4980   *C   = Cmat;
4981   PetscFunctionReturn(0);
4982 }
4983 
4984 /* ----------------------------------------------------------------*/
4985 #undef __FUNCT__
4986 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4987 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4988 {
4989   PetscErrorCode ierr;
4990 
4991   PetscFunctionBegin;
4992   if (scall == MAT_INITIAL_MATRIX){
4993     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4994   }
4995   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4996   PetscFunctionReturn(0);
4997 }
4998 
4999 EXTERN_C_BEGIN
5000 #if defined(PETSC_HAVE_MUMPS)
5001 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5002 #endif
5003 #if defined(PETSC_HAVE_PASTIX)
5004 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5005 #endif
5006 #if defined(PETSC_HAVE_SUPERLU_DIST)
5007 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5008 #endif
5009 #if defined(PETSC_HAVE_SPOOLES)
5010 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5011 #endif
5012 EXTERN_C_END
5013 
5014 /*MC
5015    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5016 
5017    Options Database Keys:
5018 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5019 
5020   Level: beginner
5021 
5022 .seealso: MatCreateMPIAIJ()
5023 M*/
5024 
5025 EXTERN_C_BEGIN
5026 #undef __FUNCT__
5027 #define __FUNCT__ "MatCreate_MPIAIJ"
5028 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5029 {
5030   Mat_MPIAIJ     *b;
5031   PetscErrorCode ierr;
5032   PetscMPIInt    size;
5033 
5034   PetscFunctionBegin;
5035   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5036 
5037   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5038   B->data         = (void*)b;
5039   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5040   B->rmap->bs     = 1;
5041   B->assembled    = PETSC_FALSE;
5042   B->mapping      = 0;
5043 
5044   B->insertmode   = NOT_SET_VALUES;
5045   b->size         = size;
5046   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5047 
5048   /* build cache for off array entries formed */
5049   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5050   b->donotstash  = PETSC_FALSE;
5051   b->colmap      = 0;
5052   b->garray      = 0;
5053   b->roworiented = PETSC_TRUE;
5054 
5055   /* stuff used for matrix vector multiply */
5056   b->lvec      = PETSC_NULL;
5057   b->Mvctx     = PETSC_NULL;
5058 
5059   /* stuff for MatGetRow() */
5060   b->rowindices   = 0;
5061   b->rowvalues    = 0;
5062   b->getrowactive = PETSC_FALSE;
5063 
5064 #if defined(PETSC_HAVE_SPOOLES)
5065   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5066                                      "MatGetFactor_mpiaij_spooles",
5067                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5068 #endif
5069 #if defined(PETSC_HAVE_MUMPS)
5070   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5071                                      "MatGetFactor_mpiaij_mumps",
5072                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5073 #endif
5074 #if defined(PETSC_HAVE_PASTIX)
5075   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5076 					   "MatGetFactor_mpiaij_pastix",
5077 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5078 #endif
5079 #if defined(PETSC_HAVE_SUPERLU_DIST)
5080   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5081                                      "MatGetFactor_mpiaij_superlu_dist",
5082                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5083 #endif
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5085                                      "MatStoreValues_MPIAIJ",
5086                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5088                                      "MatRetrieveValues_MPIAIJ",
5089                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5091 				     "MatGetDiagonalBlock_MPIAIJ",
5092                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5094 				     "MatIsTranspose_MPIAIJ",
5095 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5097 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5098 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5100 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5101 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5103 				     "MatDiagonalScaleLocal_MPIAIJ",
5104 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5106                                      "MatConvert_MPIAIJ_MPICSRPERM",
5107                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5109                                      "MatConvert_MPIAIJ_MPICRL",
5110                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5112                                      "MatConvert_MPIAIJ_MPISBAIJ",
5113                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5115                                      "MatMatMult_MPIDense_MPIAIJ",
5116                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5118                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5119                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5120   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5121                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5122                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5123   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5124   PetscFunctionReturn(0);
5125 }
5126 EXTERN_C_END
5127 
5128 #undef __FUNCT__
5129 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5130 /*@
5131      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5132          and "off-diagonal" part of the matrix in CSR format.
5133 
5134    Collective on MPI_Comm
5135 
5136    Input Parameters:
5137 +  comm - MPI communicator
5138 .  m - number of local rows (Cannot be PETSC_DECIDE)
5139 .  n - This value should be the same as the local size used in creating the
5140        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5141        calculated if N is given) For square matrices n is almost always m.
5142 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5143 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5144 .   i - row indices for "diagonal" portion of matrix
5145 .   j - column indices
5146 .   a - matrix values
5147 .   oi - row indices for "off-diagonal" portion of matrix
5148 .   oj - column indices
5149 -   oa - matrix values
5150 
5151    Output Parameter:
5152 .   mat - the matrix
5153 
5154    Level: advanced
5155 
5156    Notes:
5157        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5158 
5159        The i and j indices are 0 based
5160 
5161        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5162 
5163        This sets local rows and cannot be used to set off-processor values.
5164 
5165        You cannot later use MatSetValues() to change values in this matrix.
5166 
5167 .keywords: matrix, aij, compressed row, sparse, parallel
5168 
5169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5170           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5171 @*/
5172 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5173 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5174 {
5175   PetscErrorCode ierr;
5176   Mat_MPIAIJ     *maij;
5177 
5178  PetscFunctionBegin;
5179   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5180   if (i[0]) {
5181     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5182   }
5183   if (oi[0]) {
5184     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5185   }
5186   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5187   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5188   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5189   maij = (Mat_MPIAIJ*) (*mat)->data;
5190   maij->donotstash     = PETSC_TRUE;
5191   (*mat)->preallocated = PETSC_TRUE;
5192 
5193   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5194   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5195   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5196   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5197 
5198   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5199   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5200 
5201   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205 
5206   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5207   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5208   PetscFunctionReturn(0);
5209 }
5210 
5211 /*
5212     Special version for direct calls from Fortran
5213 */
5214 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5215 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5216 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5217 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5218 #endif
5219 
5220 /* Change these macros so can be used in void function */
5221 #undef CHKERRQ
5222 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5223 #undef SETERRQ2
5224 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5225 #undef SETERRQ
5226 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5227 
5228 EXTERN_C_BEGIN
5229 #undef __FUNCT__
5230 #define __FUNCT__ "matsetvaluesmpiaij_"
5231 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5232 {
5233   Mat             mat = *mmat;
5234   PetscInt        m = *mm, n = *mn;
5235   InsertMode      addv = *maddv;
5236   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5237   PetscScalar     value;
5238   PetscErrorCode  ierr;
5239 
5240   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5241   if (mat->insertmode == NOT_SET_VALUES) {
5242     mat->insertmode = addv;
5243   }
5244 #if defined(PETSC_USE_DEBUG)
5245   else if (mat->insertmode != addv) {
5246     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5247   }
5248 #endif
5249   {
5250   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5251   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5252   PetscTruth      roworiented = aij->roworiented;
5253 
5254   /* Some Variables required in the macro */
5255   Mat             A = aij->A;
5256   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5257   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5258   MatScalar       *aa = a->a;
5259   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5260   Mat             B = aij->B;
5261   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5262   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5263   MatScalar       *ba = b->a;
5264 
5265   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5266   PetscInt        nonew = a->nonew;
5267   MatScalar       *ap1,*ap2;
5268 
5269   PetscFunctionBegin;
5270   for (i=0; i<m; i++) {
5271     if (im[i] < 0) continue;
5272 #if defined(PETSC_USE_DEBUG)
5273     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5274 #endif
5275     if (im[i] >= rstart && im[i] < rend) {
5276       row      = im[i] - rstart;
5277       lastcol1 = -1;
5278       rp1      = aj + ai[row];
5279       ap1      = aa + ai[row];
5280       rmax1    = aimax[row];
5281       nrow1    = ailen[row];
5282       low1     = 0;
5283       high1    = nrow1;
5284       lastcol2 = -1;
5285       rp2      = bj + bi[row];
5286       ap2      = ba + bi[row];
5287       rmax2    = bimax[row];
5288       nrow2    = bilen[row];
5289       low2     = 0;
5290       high2    = nrow2;
5291 
5292       for (j=0; j<n; j++) {
5293         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5294         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5295         if (in[j] >= cstart && in[j] < cend){
5296           col = in[j] - cstart;
5297           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5298         } else if (in[j] < 0) continue;
5299 #if defined(PETSC_USE_DEBUG)
5300         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5301 #endif
5302         else {
5303           if (mat->was_assembled) {
5304             if (!aij->colmap) {
5305               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5306             }
5307 #if defined (PETSC_USE_CTABLE)
5308             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5309 	    col--;
5310 #else
5311             col = aij->colmap[in[j]] - 1;
5312 #endif
5313             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5314               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5315               col =  in[j];
5316               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5317               B = aij->B;
5318               b = (Mat_SeqAIJ*)B->data;
5319               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5320               rp2      = bj + bi[row];
5321               ap2      = ba + bi[row];
5322               rmax2    = bimax[row];
5323               nrow2    = bilen[row];
5324               low2     = 0;
5325               high2    = nrow2;
5326               bm       = aij->B->rmap->n;
5327               ba = b->a;
5328             }
5329           } else col = in[j];
5330           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5331         }
5332       }
5333     } else {
5334       if (!aij->donotstash) {
5335         if (roworiented) {
5336           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5337         } else {
5338           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5339         }
5340       }
5341     }
5342   }}
5343   PetscFunctionReturnVoid();
5344 }
5345 EXTERN_C_END
5346 
5347