xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision eeffb40d691afbdd57a8091619e7ddd44ac5fdca)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) {
468     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   }
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
643   source = lens + nrecvs;
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree(lens);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "MatView_MPIAIJ_Binary"
926 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
927 {
928   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
929   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
930   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
931   PetscErrorCode    ierr;
932   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
933   int               fd;
934   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
935   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
936   PetscScalar       *column_values;
937 
938   PetscFunctionBegin;
939   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
940   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
941   nz   = A->nz + B->nz;
942   if (!rank) {
943     header[0] = MAT_FILE_COOKIE;
944     header[1] = mat->rmap->N;
945     header[2] = mat->cmap->N;
946     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
947     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
948     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
949     /* get largest number of rows any processor has */
950     rlen = mat->rmap->n;
951     range = mat->rmap->range;
952     for (i=1; i<size; i++) {
953       rlen = PetscMax(rlen,range[i+1] - range[i]);
954     }
955   } else {
956     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
957     rlen = mat->rmap->n;
958   }
959 
960   /* load up the local row counts */
961   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
962   for (i=0; i<mat->rmap->n; i++) {
963     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
964   }
965 
966   /* store the row lengths to the file */
967   if (!rank) {
968     MPI_Status status;
969     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
970     for (i=1; i<size; i++) {
971       rlen = range[i+1] - range[i];
972       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
973       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
974     }
975   } else {
976     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
979 
980   /* load up the local column indices */
981   nzmax = nz; /* )th processor needs space a largest processor needs */
982   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
983   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
984   cnt  = 0;
985   for (i=0; i<mat->rmap->n; i++) {
986     for (j=B->i[i]; j<B->i[i+1]; j++) {
987       if ( (col = garray[B->j[j]]) > cstart) break;
988       column_indices[cnt++] = col;
989     }
990     for (k=A->i[i]; k<A->i[i+1]; k++) {
991       column_indices[cnt++] = A->j[k] + cstart;
992     }
993     for (; j<B->i[i+1]; j++) {
994       column_indices[cnt++] = garray[B->j[j]];
995     }
996   }
997   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
998 
999   /* store the column indices to the file */
1000   if (!rank) {
1001     MPI_Status status;
1002     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1003     for (i=1; i<size; i++) {
1004       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1005       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1006       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1007       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1008     }
1009   } else {
1010     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1011     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012   }
1013   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1014 
1015   /* load up the local column values */
1016   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1017   cnt  = 0;
1018   for (i=0; i<mat->rmap->n; i++) {
1019     for (j=B->i[i]; j<B->i[i+1]; j++) {
1020       if ( garray[B->j[j]] > cstart) break;
1021       column_values[cnt++] = B->a[j];
1022     }
1023     for (k=A->i[i]; k<A->i[i+1]; k++) {
1024       column_values[cnt++] = A->a[k];
1025     }
1026     for (; j<B->i[i+1]; j++) {
1027       column_values[cnt++] = B->a[j];
1028     }
1029   }
1030   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1031 
1032   /* store the column values to the file */
1033   if (!rank) {
1034     MPI_Status status;
1035     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1036     for (i=1; i<size; i++) {
1037       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1038       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1039       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1040       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1041     }
1042   } else {
1043     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1044     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045   }
1046   ierr = PetscFree(column_values);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1052 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1053 {
1054   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1055   PetscErrorCode    ierr;
1056   PetscMPIInt       rank = aij->rank,size = aij->size;
1057   PetscTruth        isdraw,iascii,isbinary;
1058   PetscViewer       sviewer;
1059   PetscViewerFormat format;
1060 
1061   PetscFunctionBegin;
1062   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1065   if (iascii) {
1066     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1067     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1068       MatInfo    info;
1069       PetscTruth inodes;
1070 
1071       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1072       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1073       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1074       if (!inodes) {
1075         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1076 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1077       } else {
1078         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1079 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1080       }
1081       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1082       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1083       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1084       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1085       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1086       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1087       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1088       PetscFunctionReturn(0);
1089     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1090       PetscInt   inodecount,inodelimit,*inodes;
1091       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1092       if (inodes) {
1093         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1094       } else {
1095         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1096       }
1097       PetscFunctionReturn(0);
1098     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1099       PetscFunctionReturn(0);
1100     }
1101   } else if (isbinary) {
1102     if (size == 1) {
1103       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1104       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1105     } else {
1106       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1107     }
1108     PetscFunctionReturn(0);
1109   } else if (isdraw) {
1110     PetscDraw  draw;
1111     PetscTruth isnull;
1112     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1113     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1114   }
1115 
1116   if (size == 1) {
1117     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1118     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1119   } else {
1120     /* assemble the entire matrix onto first processor. */
1121     Mat         A;
1122     Mat_SeqAIJ  *Aloc;
1123     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1124     MatScalar   *a;
1125 
1126     if (mat->rmap->N > 1024) {
1127       PetscTruth flg = PETSC_FALSE;
1128 
1129       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1130       if (!flg) {
1131         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1132       }
1133     }
1134 
1135     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1136     if (!rank) {
1137       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1138     } else {
1139       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1140     }
1141     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1142     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1143     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1144     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1145 
1146     /* copy over the A part */
1147     Aloc = (Mat_SeqAIJ*)aij->A->data;
1148     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1149     row = mat->rmap->rstart;
1150     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1151     for (i=0; i<m; i++) {
1152       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1153       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1154     }
1155     aj = Aloc->j;
1156     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1157 
1158     /* copy over the B part */
1159     Aloc = (Mat_SeqAIJ*)aij->B->data;
1160     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1161     row  = mat->rmap->rstart;
1162     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1163     ct   = cols;
1164     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1165     for (i=0; i<m; i++) {
1166       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1167       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1168     }
1169     ierr = PetscFree(ct);CHKERRQ(ierr);
1170     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1171     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     /*
1173        Everyone has to call to draw the matrix since the graphics waits are
1174        synchronized across all processors that share the PetscDraw object
1175     */
1176     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1177     if (!rank) {
1178       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1179       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1180     }
1181     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1182     ierr = MatDestroy(A);CHKERRQ(ierr);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatView_MPIAIJ"
1189 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1190 {
1191   PetscErrorCode ierr;
1192   PetscTruth     iascii,isdraw,issocket,isbinary;
1193 
1194   PetscFunctionBegin;
1195   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1197   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1199   if (iascii || isdraw || isbinary || issocket) {
1200     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1201   } else {
1202     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatSOR_MPIAIJ"
1209 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1210 {
1211   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1212   PetscErrorCode ierr;
1213   Vec            bb1 = 0;
1214   PetscTruth     hasop;
1215 
1216   PetscFunctionBegin;
1217   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1218     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1219   }
1220 
1221   if (flag == SOR_APPLY_UPPER) {
1222     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223     PetscFunctionReturn(0);
1224   }
1225 
1226   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1227     if (flag & SOR_ZERO_INITIAL_GUESS) {
1228       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1229       its--;
1230     }
1231 
1232     while (its--) {
1233       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235 
1236       /* update rhs: bb1 = bb - B*x */
1237       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1238       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1239 
1240       /* local sweep */
1241       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1242     }
1243   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1244     if (flag & SOR_ZERO_INITIAL_GUESS) {
1245       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1246       its--;
1247     }
1248     while (its--) {
1249       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1250       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251 
1252       /* update rhs: bb1 = bb - B*x */
1253       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1254       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1255 
1256       /* local sweep */
1257       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1258     }
1259   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1260     if (flag & SOR_ZERO_INITIAL_GUESS) {
1261       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1262       its--;
1263     }
1264     while (its--) {
1265       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267 
1268       /* update rhs: bb1 = bb - B*x */
1269       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1270       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1271 
1272       /* local sweep */
1273       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1274     }
1275   }  else if (flag & SOR_EISENSTAT) {
1276     Vec         xx1;
1277 
1278     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1279     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1280 
1281     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     if (!mat->diag) {
1284       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1285       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1286     }
1287     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1288     if (hasop) {
1289       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1290     } else {
1291       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1292     }
1293     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1294 
1295     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1296 
1297     /* local sweep */
1298     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1299     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1300     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1301   } else {
1302     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1303   }
1304 
1305   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatPermute_MPIAIJ"
1311 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1312 {
1313   MPI_Comm       comm,pcomm;
1314   PetscInt       first,local_size,nrows;
1315   const PetscInt *rows;
1316   PetscMPIInt    size;
1317   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1322   /* make a collective version of 'rowp' */
1323   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1324   if (pcomm==comm) {
1325     crowp = rowp;
1326   } else {
1327     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1328     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1329     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1330     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1331   }
1332   /* collect the global row permutation and invert it */
1333   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1334   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1335   if (pcomm!=comm) {
1336     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1337   }
1338   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1339   /* get the local target indices */
1340   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1341   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1343   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1344   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1345   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1346   /* the column permutation is so much easier;
1347      make a local version of 'colp' and invert it */
1348   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1349   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1350   if (size==1) {
1351     lcolp = colp;
1352   } else {
1353     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1354     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1355     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1356   }
1357   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1358   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1359   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1360   if (size>1) {
1361     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1362     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1363   }
1364   /* now we just get the submatrix */
1365   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1366   /* clean up */
1367   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1368   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1374 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1375 {
1376   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1377   Mat            A = mat->A,B = mat->B;
1378   PetscErrorCode ierr;
1379   PetscReal      isend[5],irecv[5];
1380 
1381   PetscFunctionBegin;
1382   info->block_size     = 1.0;
1383   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1384   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1385   isend[3] = info->memory;  isend[4] = info->mallocs;
1386   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1387   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1388   isend[3] += info->memory;  isend[4] += info->mallocs;
1389   if (flag == MAT_LOCAL) {
1390     info->nz_used      = isend[0];
1391     info->nz_allocated = isend[1];
1392     info->nz_unneeded  = isend[2];
1393     info->memory       = isend[3];
1394     info->mallocs      = isend[4];
1395   } else if (flag == MAT_GLOBAL_MAX) {
1396     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1397     info->nz_used      = irecv[0];
1398     info->nz_allocated = irecv[1];
1399     info->nz_unneeded  = irecv[2];
1400     info->memory       = irecv[3];
1401     info->mallocs      = irecv[4];
1402   } else if (flag == MAT_GLOBAL_SUM) {
1403     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1404     info->nz_used      = irecv[0];
1405     info->nz_allocated = irecv[1];
1406     info->nz_unneeded  = irecv[2];
1407     info->memory       = irecv[3];
1408     info->mallocs      = irecv[4];
1409   }
1410   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1411   info->fill_ratio_needed = 0;
1412   info->factor_mallocs    = 0;
1413 
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatSetOption_MPIAIJ"
1419 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1420 {
1421   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1422   PetscErrorCode ierr;
1423 
1424   PetscFunctionBegin;
1425   switch (op) {
1426   case MAT_NEW_NONZERO_LOCATIONS:
1427   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1428   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1429   case MAT_KEEP_NONZERO_PATTERN:
1430   case MAT_NEW_NONZERO_LOCATION_ERR:
1431   case MAT_USE_INODES:
1432   case MAT_IGNORE_ZERO_ENTRIES:
1433     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1434     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1435     break;
1436   case MAT_ROW_ORIENTED:
1437     a->roworiented = flg;
1438     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1439     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1440     break;
1441   case MAT_NEW_DIAGONALS:
1442     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1443     break;
1444   case MAT_IGNORE_OFF_PROC_ENTRIES:
1445     a->donotstash = PETSC_TRUE;
1446     break;
1447   case MAT_SYMMETRIC:
1448     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1449     break;
1450   case MAT_STRUCTURALLY_SYMMETRIC:
1451     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1452     break;
1453   case MAT_HERMITIAN:
1454     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1455     break;
1456   case MAT_SYMMETRY_ETERNAL:
1457     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1458     break;
1459   default:
1460     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1461   }
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatGetRow_MPIAIJ"
1467 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1468 {
1469   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1470   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1471   PetscErrorCode ierr;
1472   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1473   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1474   PetscInt       *cmap,*idx_p;
1475 
1476   PetscFunctionBegin;
1477   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1478   mat->getrowactive = PETSC_TRUE;
1479 
1480   if (!mat->rowvalues && (idx || v)) {
1481     /*
1482         allocate enough space to hold information from the longest row.
1483     */
1484     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1485     PetscInt     max = 1,tmp;
1486     for (i=0; i<matin->rmap->n; i++) {
1487       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1488       if (max < tmp) { max = tmp; }
1489     }
1490     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1491     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1492   }
1493 
1494   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1495   lrow = row - rstart;
1496 
1497   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1498   if (!v)   {pvA = 0; pvB = 0;}
1499   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1500   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1501   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1502   nztot = nzA + nzB;
1503 
1504   cmap  = mat->garray;
1505   if (v  || idx) {
1506     if (nztot) {
1507       /* Sort by increasing column numbers, assuming A and B already sorted */
1508       PetscInt imark = -1;
1509       if (v) {
1510         *v = v_p = mat->rowvalues;
1511         for (i=0; i<nzB; i++) {
1512           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1513           else break;
1514         }
1515         imark = i;
1516         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1517         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1518       }
1519       if (idx) {
1520         *idx = idx_p = mat->rowindices;
1521         if (imark > -1) {
1522           for (i=0; i<imark; i++) {
1523             idx_p[i] = cmap[cworkB[i]];
1524           }
1525         } else {
1526           for (i=0; i<nzB; i++) {
1527             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1528             else break;
1529           }
1530           imark = i;
1531         }
1532         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1533         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1534       }
1535     } else {
1536       if (idx) *idx = 0;
1537       if (v)   *v   = 0;
1538     }
1539   }
1540   *nz = nztot;
1541   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1542   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1551 
1552   PetscFunctionBegin;
1553   if (!aij->getrowactive) {
1554     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1555   }
1556   aij->getrowactive = PETSC_FALSE;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatNorm_MPIAIJ"
1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1563 {
1564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1565   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,cstart = mat->cmap->rstart;
1568   PetscReal      sum = 0.0;
1569   MatScalar      *v;
1570 
1571   PetscFunctionBegin;
1572   if (aij->size == 1) {
1573     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1574   } else {
1575     if (type == NORM_FROBENIUS) {
1576       v = amat->a;
1577       for (i=0; i<amat->nz; i++) {
1578 #if defined(PETSC_USE_COMPLEX)
1579         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1580 #else
1581         sum += (*v)*(*v); v++;
1582 #endif
1583       }
1584       v = bmat->a;
1585       for (i=0; i<bmat->nz; i++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1588 #else
1589         sum += (*v)*(*v); v++;
1590 #endif
1591       }
1592       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1593       *norm = sqrt(*norm);
1594     } else if (type == NORM_1) { /* max column norm */
1595       PetscReal *tmp,*tmp2;
1596       PetscInt  *jj,*garray = aij->garray;
1597       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1599       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1600       *norm = 0.0;
1601       v = amat->a; jj = amat->j;
1602       for (j=0; j<amat->nz; j++) {
1603         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1604       }
1605       v = bmat->a; jj = bmat->j;
1606       for (j=0; j<bmat->nz; j++) {
1607         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1608       }
1609       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1610       for (j=0; j<mat->cmap->N; j++) {
1611         if (tmp2[j] > *norm) *norm = tmp2[j];
1612       }
1613       ierr = PetscFree(tmp);CHKERRQ(ierr);
1614       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1615     } else if (type == NORM_INFINITY) { /* max row norm */
1616       PetscReal ntemp = 0.0;
1617       for (j=0; j<aij->A->rmap->n; j++) {
1618         v = amat->a + amat->i[j];
1619         sum = 0.0;
1620         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1621           sum += PetscAbsScalar(*v); v++;
1622         }
1623         v = bmat->a + bmat->i[j];
1624         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1625           sum += PetscAbsScalar(*v); v++;
1626         }
1627         if (sum > ntemp) ntemp = sum;
1628       }
1629       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1630     } else {
1631       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1632     }
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatTranspose_MPIAIJ"
1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1640 {
1641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1642   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1643   PetscErrorCode ierr;
1644   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1645   PetscInt       cstart=A->cmap->rstart,ncol;
1646   Mat            B;
1647   MatScalar      *array;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1651 
1652   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1653   ai = Aloc->i; aj = Aloc->j;
1654   bi = Bloc->i; bj = Bloc->j;
1655   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1656     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1657     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1658     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1659     for (i=0; i<ai[ma]; i++){
1660       d_nnz[aj[i]] ++;
1661       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1662     }
1663 
1664     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1665     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1666     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1667     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1668     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1669   } else {
1670     B = *matout;
1671   }
1672 
1673   /* copy over the A part */
1674   array = Aloc->a;
1675   row = A->rmap->rstart;
1676   for (i=0; i<ma; i++) {
1677     ncol = ai[i+1]-ai[i];
1678     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1679     row++; array += ncol; aj += ncol;
1680   }
1681   aj = Aloc->j;
1682   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1683 
1684   /* copy over the B part */
1685   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1686   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1687   array = Bloc->a;
1688   row = A->rmap->rstart;
1689   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1690   cols_tmp = cols;
1691   for (i=0; i<mb; i++) {
1692     ncol = bi[i+1]-bi[i];
1693     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1694     row++; array += ncol; cols_tmp += ncol;
1695   }
1696   ierr = PetscFree(cols);CHKERRQ(ierr);
1697 
1698   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1701     *matout = B;
1702   } else {
1703     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1711 {
1712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1713   Mat            a = aij->A,b = aij->B;
1714   PetscErrorCode ierr;
1715   PetscInt       s1,s2,s3;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1719   if (rr) {
1720     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1721     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1722     /* Overlap communication with computation. */
1723     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1724   }
1725   if (ll) {
1726     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1727     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1728     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1729   }
1730   /* scale  the diagonal block */
1731   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1732 
1733   if (rr) {
1734     /* Do a scatter end and then right scale the off-diagonal block */
1735     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1736     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1737   }
1738 
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1745 {
1746   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1751   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1752   PetscFunctionReturn(0);
1753 }
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1756 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1757 {
1758   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1759   PetscErrorCode ierr;
1760 
1761   PetscFunctionBegin;
1762   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatEqual_MPIAIJ"
1768 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1769 {
1770   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1771   Mat            a,b,c,d;
1772   PetscTruth     flg;
1773   PetscErrorCode ierr;
1774 
1775   PetscFunctionBegin;
1776   a = matA->A; b = matA->B;
1777   c = matB->A; d = matB->B;
1778 
1779   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1780   if (flg) {
1781     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1782   }
1783   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatCopy_MPIAIJ"
1789 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1790 {
1791   PetscErrorCode ierr;
1792   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1793   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1794 
1795   PetscFunctionBegin;
1796   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1797   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1798     /* because of the column compression in the off-processor part of the matrix a->B,
1799        the number of columns in a->B and b->B may be different, hence we cannot call
1800        the MatCopy() directly on the two parts. If need be, we can provide a more
1801        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1802        then copying the submatrices */
1803     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1804   } else {
1805     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1806     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1807   }
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1813 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1814 {
1815   PetscErrorCode ierr;
1816 
1817   PetscFunctionBegin;
1818   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 #include "petscblaslapack.h"
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatAXPY_MPIAIJ"
1825 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1826 {
1827   PetscErrorCode ierr;
1828   PetscInt       i;
1829   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1830   PetscBLASInt   bnz,one=1;
1831   Mat_SeqAIJ     *x,*y;
1832 
1833   PetscFunctionBegin;
1834   if (str == SAME_NONZERO_PATTERN) {
1835     PetscScalar alpha = a;
1836     x = (Mat_SeqAIJ *)xx->A->data;
1837     y = (Mat_SeqAIJ *)yy->A->data;
1838     bnz = PetscBLASIntCast(x->nz);
1839     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1840     x = (Mat_SeqAIJ *)xx->B->data;
1841     y = (Mat_SeqAIJ *)yy->B->data;
1842     bnz = PetscBLASIntCast(x->nz);
1843     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1844   } else if (str == SUBSET_NONZERO_PATTERN) {
1845     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1846 
1847     x = (Mat_SeqAIJ *)xx->B->data;
1848     y = (Mat_SeqAIJ *)yy->B->data;
1849     if (y->xtoy && y->XtoY != xx->B) {
1850       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1851       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1852     }
1853     if (!y->xtoy) { /* get xtoy */
1854       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1855       y->XtoY = xx->B;
1856       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1857     }
1858     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1859   } else {
1860     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1861   }
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatConjugate_MPIAIJ"
1869 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1870 {
1871 #if defined(PETSC_USE_COMPLEX)
1872   PetscErrorCode ierr;
1873   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1874 
1875   PetscFunctionBegin;
1876   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1877   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1878 #else
1879   PetscFunctionBegin;
1880 #endif
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatRealPart_MPIAIJ"
1886 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1887 {
1888   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1889   PetscErrorCode ierr;
1890 
1891   PetscFunctionBegin;
1892   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1893   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1899 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1900 {
1901   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1906   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #ifdef PETSC_HAVE_PBGL
1911 
1912 #include <boost/parallel/mpi/bsp_process_group.hpp>
1913 #include <boost/graph/distributed/ilu_default_graph.hpp>
1914 #include <boost/graph/distributed/ilu_0_block.hpp>
1915 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1916 #include <boost/graph/distributed/petsc/interface.hpp>
1917 #include <boost/multi_array.hpp>
1918 #include <boost/parallel/distributed_property_map->hpp>
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1922 /*
1923   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1924 */
1925 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1926 {
1927   namespace petsc = boost::distributed::petsc;
1928 
1929   namespace graph_dist = boost::graph::distributed;
1930   using boost::graph::distributed::ilu_default::process_group_type;
1931   using boost::graph::ilu_permuted;
1932 
1933   PetscTruth      row_identity, col_identity;
1934   PetscContainer  c;
1935   PetscInt        m, n, M, N;
1936   PetscErrorCode  ierr;
1937 
1938   PetscFunctionBegin;
1939   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1940   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1941   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1942   if (!row_identity || !col_identity) {
1943     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1944   }
1945 
1946   process_group_type pg;
1947   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1948   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1949   lgraph_type&   level_graph = *lgraph_p;
1950   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1951 
1952   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1953   ilu_permuted(level_graph);
1954 
1955   /* put together the new matrix */
1956   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1957   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1958   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1959   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1960   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1961   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1962   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963 
1964   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1965   ierr = PetscContainerSetPointer(c, lgraph_p);
1966   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1972 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1973 {
1974   PetscFunctionBegin;
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatSolve_MPIAIJ"
1980 /*
1981   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1982 */
1983 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1984 {
1985   namespace graph_dist = boost::graph::distributed;
1986 
1987   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1988   lgraph_type*   lgraph_p;
1989   PetscContainer c;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1994   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1995   ierr = VecCopy(b, x);CHKERRQ(ierr);
1996 
1997   PetscScalar* array_x;
1998   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1999   PetscInt sx;
2000   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2001 
2002   PetscScalar* array_b;
2003   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2004   PetscInt sb;
2005   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2006 
2007   lgraph_type&   level_graph = *lgraph_p;
2008   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2009 
2010   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2011   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2012                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2013 
2014   typedef boost::iterator_property_map<array_ref_type::iterator,
2015                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2016   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2017                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2018 
2019   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2020 
2021   PetscFunctionReturn(0);
2022 }
2023 #endif
2024 
2025 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2026   PetscInt       nzlocal,nsends,nrecvs;
2027   PetscMPIInt    *send_rank;
2028   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2029   PetscScalar    *sbuf_a,**rbuf_a;
2030   PetscErrorCode (*MatDestroy)(Mat);
2031 } Mat_Redundant;
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2035 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2036 {
2037   PetscErrorCode       ierr;
2038   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2039   PetscInt             i;
2040 
2041   PetscFunctionBegin;
2042   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2043   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2044   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2045   for (i=0; i<redund->nrecvs; i++){
2046     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2047     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2048   }
2049   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2050   ierr = PetscFree(redund);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatDestroy_MatRedundant"
2056 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2057 {
2058   PetscErrorCode  ierr;
2059   PetscContainer  container;
2060   Mat_Redundant   *redund=PETSC_NULL;
2061 
2062   PetscFunctionBegin;
2063   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2064   if (container) {
2065     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2066   } else {
2067     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2068   }
2069   A->ops->destroy = redund->MatDestroy;
2070   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2071   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2072   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2078 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2079 {
2080   PetscMPIInt    rank,size;
2081   MPI_Comm       comm=((PetscObject)mat)->comm;
2082   PetscErrorCode ierr;
2083   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2084   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2085   PetscInt       *rowrange=mat->rmap->range;
2086   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2087   Mat            A=aij->A,B=aij->B,C=*matredundant;
2088   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2089   PetscScalar    *sbuf_a;
2090   PetscInt       nzlocal=a->nz+b->nz;
2091   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2092   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2093   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2094   MatScalar      *aworkA,*aworkB;
2095   PetscScalar    *vals;
2096   PetscMPIInt    tag1,tag2,tag3,imdex;
2097   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2098                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2099   MPI_Status     recv_status,*send_status;
2100   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2101   PetscInt       **rbuf_j=PETSC_NULL;
2102   PetscScalar    **rbuf_a=PETSC_NULL;
2103   Mat_Redundant  *redund=PETSC_NULL;
2104   PetscContainer container;
2105 
2106   PetscFunctionBegin;
2107   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2109 
2110   if (reuse == MAT_REUSE_MATRIX) {
2111     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2112     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2113     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2115     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2116     if (container) {
2117       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2118     } else {
2119       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2120     }
2121     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2122 
2123     nsends    = redund->nsends;
2124     nrecvs    = redund->nrecvs;
2125     send_rank = redund->send_rank; recv_rank = send_rank + size;
2126     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2127     sbuf_j    = redund->sbuf_j;
2128     sbuf_a    = redund->sbuf_a;
2129     rbuf_j    = redund->rbuf_j;
2130     rbuf_a    = redund->rbuf_a;
2131   }
2132 
2133   if (reuse == MAT_INITIAL_MATRIX){
2134     PetscMPIInt  subrank,subsize;
2135     PetscInt     nleftover,np_subcomm;
2136     /* get the destination processors' id send_rank, nsends and nrecvs */
2137     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2138     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2139     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2140     recv_rank = send_rank + size;
2141     np_subcomm = size/nsubcomm;
2142     nleftover  = size - nsubcomm*np_subcomm;
2143     nsends = 0; nrecvs = 0;
2144     for (i=0; i<size; i++){ /* i=rank*/
2145       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2146         send_rank[nsends] = i; nsends++;
2147         recv_rank[nrecvs++] = i;
2148       }
2149     }
2150     if (rank >= size - nleftover){/* this proc is a leftover processor */
2151       i = size-nleftover-1;
2152       j = 0;
2153       while (j < nsubcomm - nleftover){
2154         send_rank[nsends++] = i;
2155         i--; j++;
2156       }
2157     }
2158 
2159     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2160       for (i=0; i<nleftover; i++){
2161         recv_rank[nrecvs++] = size-nleftover+i;
2162       }
2163     }
2164 
2165     /* allocate sbuf_j, sbuf_a */
2166     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2167     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2168     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2169   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2170 
2171   /* copy mat's local entries into the buffers */
2172   if (reuse == MAT_INITIAL_MATRIX){
2173     rownz_max = 0;
2174     rptr = sbuf_j;
2175     cols = sbuf_j + rend-rstart + 1;
2176     vals = sbuf_a;
2177     rptr[0] = 0;
2178     for (i=0; i<rend-rstart; i++){
2179       row = i + rstart;
2180       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2181       ncols  = nzA + nzB;
2182       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2183       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2184       /* load the column indices for this row into cols */
2185       lwrite = 0;
2186       for (l=0; l<nzB; l++) {
2187         if ((ctmp = bmap[cworkB[l]]) < cstart){
2188           vals[lwrite]   = aworkB[l];
2189           cols[lwrite++] = ctmp;
2190         }
2191       }
2192       for (l=0; l<nzA; l++){
2193         vals[lwrite]   = aworkA[l];
2194         cols[lwrite++] = cstart + cworkA[l];
2195       }
2196       for (l=0; l<nzB; l++) {
2197         if ((ctmp = bmap[cworkB[l]]) >= cend){
2198           vals[lwrite]   = aworkB[l];
2199           cols[lwrite++] = ctmp;
2200         }
2201       }
2202       vals += ncols;
2203       cols += ncols;
2204       rptr[i+1] = rptr[i] + ncols;
2205       if (rownz_max < ncols) rownz_max = ncols;
2206     }
2207     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2208   } else { /* only copy matrix values into sbuf_a */
2209     rptr = sbuf_j;
2210     vals = sbuf_a;
2211     rptr[0] = 0;
2212     for (i=0; i<rend-rstart; i++){
2213       row = i + rstart;
2214       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2215       ncols  = nzA + nzB;
2216       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2217       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2218       lwrite = 0;
2219       for (l=0; l<nzB; l++) {
2220         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2221       }
2222       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2223       for (l=0; l<nzB; l++) {
2224         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2225       }
2226       vals += ncols;
2227       rptr[i+1] = rptr[i] + ncols;
2228     }
2229   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2230 
2231   /* send nzlocal to others, and recv other's nzlocal */
2232   /*--------------------------------------------------*/
2233   if (reuse == MAT_INITIAL_MATRIX){
2234     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2235     s_waits2 = s_waits3 + nsends;
2236     s_waits1 = s_waits2 + nsends;
2237     r_waits1 = s_waits1 + nsends;
2238     r_waits2 = r_waits1 + nrecvs;
2239     r_waits3 = r_waits2 + nrecvs;
2240   } else {
2241     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2242     r_waits3 = s_waits3 + nsends;
2243   }
2244 
2245   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2246   if (reuse == MAT_INITIAL_MATRIX){
2247     /* get new tags to keep the communication clean */
2248     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2249     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2250     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2251     rbuf_nz = sbuf_nz + nsends;
2252 
2253     /* post receives of other's nzlocal */
2254     for (i=0; i<nrecvs; i++){
2255       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2256     }
2257     /* send nzlocal to others */
2258     for (i=0; i<nsends; i++){
2259       sbuf_nz[i] = nzlocal;
2260       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2261     }
2262     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2263     count = nrecvs;
2264     while (count) {
2265       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2266       recv_rank[imdex] = recv_status.MPI_SOURCE;
2267       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2268       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2269 
2270       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2271       rbuf_nz[imdex] += i + 2;
2272       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2273       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2274       count--;
2275     }
2276     /* wait on sends of nzlocal */
2277     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2278     /* send mat->i,j to others, and recv from other's */
2279     /*------------------------------------------------*/
2280     for (i=0; i<nsends; i++){
2281       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2282       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2283     }
2284     /* wait on receives of mat->i,j */
2285     /*------------------------------*/
2286     count = nrecvs;
2287     while (count) {
2288       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2289       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2290       count--;
2291     }
2292     /* wait on sends of mat->i,j */
2293     /*---------------------------*/
2294     if (nsends) {
2295       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2296     }
2297   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2298 
2299   /* post receives, send and receive mat->a */
2300   /*----------------------------------------*/
2301   for (imdex=0; imdex<nrecvs; imdex++) {
2302     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2303   }
2304   for (i=0; i<nsends; i++){
2305     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2306   }
2307   count = nrecvs;
2308   while (count) {
2309     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2310     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2311     count--;
2312   }
2313   if (nsends) {
2314     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2315   }
2316 
2317   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2318 
2319   /* create redundant matrix */
2320   /*-------------------------*/
2321   if (reuse == MAT_INITIAL_MATRIX){
2322     /* compute rownz_max for preallocation */
2323     for (imdex=0; imdex<nrecvs; imdex++){
2324       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2325       rptr = rbuf_j[imdex];
2326       for (i=0; i<j; i++){
2327         ncols = rptr[i+1] - rptr[i];
2328         if (rownz_max < ncols) rownz_max = ncols;
2329       }
2330     }
2331 
2332     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2333     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2334     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2335     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2336     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2337   } else {
2338     C = *matredundant;
2339   }
2340 
2341   /* insert local matrix entries */
2342   rptr = sbuf_j;
2343   cols = sbuf_j + rend-rstart + 1;
2344   vals = sbuf_a;
2345   for (i=0; i<rend-rstart; i++){
2346     row   = i + rstart;
2347     ncols = rptr[i+1] - rptr[i];
2348     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2349     vals += ncols;
2350     cols += ncols;
2351   }
2352   /* insert received matrix entries */
2353   for (imdex=0; imdex<nrecvs; imdex++){
2354     rstart = rowrange[recv_rank[imdex]];
2355     rend   = rowrange[recv_rank[imdex]+1];
2356     rptr = rbuf_j[imdex];
2357     cols = rbuf_j[imdex] + rend-rstart + 1;
2358     vals = rbuf_a[imdex];
2359     for (i=0; i<rend-rstart; i++){
2360       row   = i + rstart;
2361       ncols = rptr[i+1] - rptr[i];
2362       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2363       vals += ncols;
2364       cols += ncols;
2365     }
2366   }
2367   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2368   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2370   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2371   if (reuse == MAT_INITIAL_MATRIX){
2372     PetscContainer container;
2373     *matredundant = C;
2374     /* create a supporting struct and attach it to C for reuse */
2375     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2376     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2377     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2378     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2380 
2381     redund->nzlocal = nzlocal;
2382     redund->nsends  = nsends;
2383     redund->nrecvs  = nrecvs;
2384     redund->send_rank = send_rank;
2385     redund->sbuf_nz = sbuf_nz;
2386     redund->sbuf_j  = sbuf_j;
2387     redund->sbuf_a  = sbuf_a;
2388     redund->rbuf_j  = rbuf_j;
2389     redund->rbuf_a  = rbuf_a;
2390 
2391     redund->MatDestroy = C->ops->destroy;
2392     C->ops->destroy    = MatDestroy_MatRedundant;
2393   }
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2399 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2400 {
2401   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2402   PetscErrorCode ierr;
2403   PetscInt       i,*idxb = 0;
2404   PetscScalar    *va,*vb;
2405   Vec            vtmp;
2406 
2407   PetscFunctionBegin;
2408   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2409   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2410   if (idx) {
2411     for (i=0; i<A->rmap->n; i++) {
2412       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2413     }
2414   }
2415 
2416   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2417   if (idx) {
2418     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2419   }
2420   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2421   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2422 
2423   for (i=0; i<A->rmap->n; i++){
2424     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2425       va[i] = vb[i];
2426       if (idx) idx[i] = a->garray[idxb[i]];
2427     }
2428   }
2429 
2430   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2431   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2432   if (idxb) {
2433     ierr = PetscFree(idxb);CHKERRQ(ierr);
2434   }
2435   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2441 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2442 {
2443   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2444   PetscErrorCode ierr;
2445   PetscInt       i,*idxb = 0;
2446   PetscScalar    *va,*vb;
2447   Vec            vtmp;
2448 
2449   PetscFunctionBegin;
2450   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2451   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2452   if (idx) {
2453     for (i=0; i<A->cmap->n; i++) {
2454       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2455     }
2456   }
2457 
2458   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2459   if (idx) {
2460     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2461   }
2462   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2463   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2464 
2465   for (i=0; i<A->rmap->n; i++){
2466     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2467       va[i] = vb[i];
2468       if (idx) idx[i] = a->garray[idxb[i]];
2469     }
2470   }
2471 
2472   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2473   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2474   if (idxb) {
2475     ierr = PetscFree(idxb);CHKERRQ(ierr);
2476   }
2477   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2483 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2484 {
2485   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2486   PetscInt       n      = A->rmap->n;
2487   PetscInt       cstart = A->cmap->rstart;
2488   PetscInt      *cmap   = mat->garray;
2489   PetscInt      *diagIdx, *offdiagIdx;
2490   Vec            diagV, offdiagV;
2491   PetscScalar   *a, *diagA, *offdiagA;
2492   PetscInt       r;
2493   PetscErrorCode ierr;
2494 
2495   PetscFunctionBegin;
2496   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2497   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2498   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2499   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2500   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2502   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2503   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2504   for(r = 0; r < n; ++r) {
2505     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2506       a[r]   = diagA[r];
2507       idx[r] = cstart + diagIdx[r];
2508     } else {
2509       a[r]   = offdiagA[r];
2510       idx[r] = cmap[offdiagIdx[r]];
2511     }
2512   }
2513   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2514   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2515   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2516   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2517   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2518   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2524 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2525 {
2526   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2527   PetscInt       n      = A->rmap->n;
2528   PetscInt       cstart = A->cmap->rstart;
2529   PetscInt      *cmap   = mat->garray;
2530   PetscInt      *diagIdx, *offdiagIdx;
2531   Vec            diagV, offdiagV;
2532   PetscScalar   *a, *diagA, *offdiagA;
2533   PetscInt       r;
2534   PetscErrorCode ierr;
2535 
2536   PetscFunctionBegin;
2537   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2538   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2539   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2540   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2541   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2543   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2544   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2545   for(r = 0; r < n; ++r) {
2546     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2547       a[r]   = diagA[r];
2548       idx[r] = cstart + diagIdx[r];
2549     } else {
2550       a[r]   = offdiagA[r];
2551       idx[r] = cmap[offdiagIdx[r]];
2552     }
2553   }
2554   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2555   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2556   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2557   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2558   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2559   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 #undef __FUNCT__
2564 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2565 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2566 {
2567   PetscErrorCode ierr;
2568   Mat            *dummy;
2569 
2570   PetscFunctionBegin;
2571   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2572   *newmat = *dummy;
2573   ierr = PetscFree(dummy);CHKERRQ(ierr);
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2578 /* -------------------------------------------------------------------*/
2579 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2580        MatGetRow_MPIAIJ,
2581        MatRestoreRow_MPIAIJ,
2582        MatMult_MPIAIJ,
2583 /* 4*/ MatMultAdd_MPIAIJ,
2584        MatMultTranspose_MPIAIJ,
2585        MatMultTransposeAdd_MPIAIJ,
2586 #ifdef PETSC_HAVE_PBGL
2587        MatSolve_MPIAIJ,
2588 #else
2589        0,
2590 #endif
2591        0,
2592        0,
2593 /*10*/ 0,
2594        0,
2595        0,
2596        MatSOR_MPIAIJ,
2597        MatTranspose_MPIAIJ,
2598 /*15*/ MatGetInfo_MPIAIJ,
2599        MatEqual_MPIAIJ,
2600        MatGetDiagonal_MPIAIJ,
2601        MatDiagonalScale_MPIAIJ,
2602        MatNorm_MPIAIJ,
2603 /*20*/ MatAssemblyBegin_MPIAIJ,
2604        MatAssemblyEnd_MPIAIJ,
2605        MatSetOption_MPIAIJ,
2606        MatZeroEntries_MPIAIJ,
2607 /*24*/ MatZeroRows_MPIAIJ,
2608        0,
2609 #ifdef PETSC_HAVE_PBGL
2610        0,
2611 #else
2612        0,
2613 #endif
2614        0,
2615        0,
2616 /*29*/ MatSetUpPreallocation_MPIAIJ,
2617 #ifdef PETSC_HAVE_PBGL
2618        0,
2619 #else
2620        0,
2621 #endif
2622        0,
2623        0,
2624        0,
2625 /*34*/ MatDuplicate_MPIAIJ,
2626        0,
2627        0,
2628        0,
2629        0,
2630 /*39*/ MatAXPY_MPIAIJ,
2631        MatGetSubMatrices_MPIAIJ,
2632        MatIncreaseOverlap_MPIAIJ,
2633        MatGetValues_MPIAIJ,
2634        MatCopy_MPIAIJ,
2635 /*44*/ MatGetRowMax_MPIAIJ,
2636        MatScale_MPIAIJ,
2637        0,
2638        0,
2639        0,
2640 /*49*/ MatSetBlockSize_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644        0,
2645 /*54*/ MatFDColoringCreate_MPIAIJ,
2646        0,
2647        MatSetUnfactored_MPIAIJ,
2648        MatPermute_MPIAIJ,
2649        0,
2650 /*59*/ MatGetSubMatrix_MPIAIJ,
2651        MatDestroy_MPIAIJ,
2652        MatView_MPIAIJ,
2653        0,
2654        0,
2655 /*64*/ 0,
2656        0,
2657        0,
2658        0,
2659        0,
2660 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2661        MatGetRowMinAbs_MPIAIJ,
2662        0,
2663        MatSetColoring_MPIAIJ,
2664 #if defined(PETSC_HAVE_ADIC)
2665        MatSetValuesAdic_MPIAIJ,
2666 #else
2667        0,
2668 #endif
2669        MatSetValuesAdifor_MPIAIJ,
2670 /*75*/ MatFDColoringApply_AIJ,
2671        0,
2672        0,
2673        0,
2674        0,
2675 /*80*/ 0,
2676        0,
2677        0,
2678 /*83*/ MatLoad_MPIAIJ,
2679        0,
2680        0,
2681        0,
2682        0,
2683        0,
2684 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2685        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2686        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2687        MatPtAP_Basic,
2688        MatPtAPSymbolic_MPIAIJ,
2689 /*94*/ MatPtAPNumeric_MPIAIJ,
2690        0,
2691        0,
2692        0,
2693        0,
2694 /*99*/ 0,
2695        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2696        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2697        MatConjugate_MPIAIJ,
2698        0,
2699 /*104*/MatSetValuesRow_MPIAIJ,
2700        MatRealPart_MPIAIJ,
2701        MatImaginaryPart_MPIAIJ,
2702        0,
2703        0,
2704 /*109*/0,
2705        MatGetRedundantMatrix_MPIAIJ,
2706        MatGetRowMin_MPIAIJ,
2707        0,
2708        0,
2709 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2710        0,
2711        0,
2712        0,
2713        0,
2714        0
2715 };
2716 
2717 /* ----------------------------------------------------------------------------------------*/
2718 
2719 EXTERN_C_BEGIN
2720 #undef __FUNCT__
2721 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2722 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2723 {
2724   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2725   PetscErrorCode ierr;
2726 
2727   PetscFunctionBegin;
2728   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2729   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2730   PetscFunctionReturn(0);
2731 }
2732 EXTERN_C_END
2733 
2734 EXTERN_C_BEGIN
2735 #undef __FUNCT__
2736 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2737 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2738 {
2739   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2744   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 EXTERN_C_END
2748 
2749 #include "petscpc.h"
2750 EXTERN_C_BEGIN
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2753 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2754 {
2755   Mat_MPIAIJ     *b;
2756   PetscErrorCode ierr;
2757   PetscInt       i;
2758 
2759   PetscFunctionBegin;
2760   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2761   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2762   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2763   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2764 
2765   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2766   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2767   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2768   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2769   if (d_nnz) {
2770     for (i=0; i<B->rmap->n; i++) {
2771       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2772     }
2773   }
2774   if (o_nnz) {
2775     for (i=0; i<B->rmap->n; i++) {
2776       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2777     }
2778   }
2779   b = (Mat_MPIAIJ*)B->data;
2780 
2781   if (!B->preallocated) {
2782     /* Explicitly create 2 MATSEQAIJ matrices. */
2783     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2784     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2785     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2786     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2787     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2788     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2789     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2791   }
2792 
2793   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2794   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2795   B->preallocated = PETSC_TRUE;
2796   PetscFunctionReturn(0);
2797 }
2798 EXTERN_C_END
2799 
2800 #undef __FUNCT__
2801 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2802 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2803 {
2804   Mat            mat;
2805   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2806   PetscErrorCode ierr;
2807 
2808   PetscFunctionBegin;
2809   *newmat       = 0;
2810   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2811   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2812   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2813   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2814   a    = (Mat_MPIAIJ*)mat->data;
2815 
2816   mat->factor       = matin->factor;
2817   mat->rmap->bs      = matin->rmap->bs;
2818   mat->assembled    = PETSC_TRUE;
2819   mat->insertmode   = NOT_SET_VALUES;
2820   mat->preallocated = PETSC_TRUE;
2821 
2822   a->size           = oldmat->size;
2823   a->rank           = oldmat->rank;
2824   a->donotstash     = oldmat->donotstash;
2825   a->roworiented    = oldmat->roworiented;
2826   a->rowindices     = 0;
2827   a->rowvalues      = 0;
2828   a->getrowactive   = PETSC_FALSE;
2829 
2830   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2831   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2832 
2833   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2834   if (oldmat->colmap) {
2835 #if defined (PETSC_USE_CTABLE)
2836     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2837 #else
2838     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2839     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2840     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2841 #endif
2842   } else a->colmap = 0;
2843   if (oldmat->garray) {
2844     PetscInt len;
2845     len  = oldmat->B->cmap->n;
2846     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2847     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2848     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2849   } else a->garray = 0;
2850 
2851   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2852   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2853   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2854   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2855   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2856   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2857   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2858   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2859   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2860   *newmat = mat;
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 #include "petscsys.h"
2865 
2866 #undef __FUNCT__
2867 #define __FUNCT__ "MatLoad_MPIAIJ"
2868 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2869 {
2870   Mat            A;
2871   PetscScalar    *vals,*svals;
2872   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2873   MPI_Status     status;
2874   PetscErrorCode ierr;
2875   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2876   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2877   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2878   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2879   PetscInt       cend,cstart,n,*rowners;
2880   int            fd;
2881 
2882   PetscFunctionBegin;
2883   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2884   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2885   if (!rank) {
2886     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2887     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2888     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2889   }
2890 
2891   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2892   M = header[1]; N = header[2];
2893   /* determine ownership of all rows */
2894   m    = M/size + ((M % size) > rank);
2895   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2896   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2897 
2898   /* First process needs enough room for process with most rows */
2899   if (!rank) {
2900     mmax       = rowners[1];
2901     for (i=2; i<size; i++) {
2902       mmax = PetscMax(mmax,rowners[i]);
2903     }
2904   } else mmax = m;
2905 
2906   rowners[0] = 0;
2907   for (i=2; i<=size; i++) {
2908     rowners[i] += rowners[i-1];
2909   }
2910   rstart = rowners[rank];
2911   rend   = rowners[rank+1];
2912 
2913   /* distribute row lengths to all processors */
2914   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2915   if (!rank) {
2916     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2917     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2918     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2919     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2920     for (j=0; j<m; j++) {
2921       procsnz[0] += ourlens[j];
2922     }
2923     for (i=1; i<size; i++) {
2924       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2925       /* calculate the number of nonzeros on each processor */
2926       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2927         procsnz[i] += rowlengths[j];
2928       }
2929       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2930       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2931     }
2932     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2933   } else {
2934     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2935     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2936   }
2937 
2938   if (!rank) {
2939     /* determine max buffer needed and allocate it */
2940     maxnz = 0;
2941     for (i=0; i<size; i++) {
2942       maxnz = PetscMax(maxnz,procsnz[i]);
2943     }
2944     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2945 
2946     /* read in my part of the matrix column indices  */
2947     nz   = procsnz[0];
2948     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2949     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2950 
2951     /* read in every one elses and ship off */
2952     for (i=1; i<size; i++) {
2953       nz     = procsnz[i];
2954       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2955       mpicnt = PetscMPIIntCast(nz);
2956       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2957     }
2958     ierr = PetscFree(cols);CHKERRQ(ierr);
2959   } else {
2960     /* determine buffer space needed for message */
2961     nz = 0;
2962     for (i=0; i<m; i++) {
2963       nz += ourlens[i];
2964     }
2965     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2966 
2967     /* receive message of column indices*/
2968     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2969     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2970     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2971     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2972     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2973     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2974   }
2975 
2976   /* determine column ownership if matrix is not square */
2977   if (N != M) {
2978     n      = N/size + ((N % size) > rank);
2979     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2980     cstart = cend - n;
2981   } else {
2982     cstart = rstart;
2983     cend   = rend;
2984     n      = cend - cstart;
2985   }
2986 
2987   /* loop over local rows, determining number of off diagonal entries */
2988   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2989   jj = 0;
2990   for (i=0; i<m; i++) {
2991     for (j=0; j<ourlens[i]; j++) {
2992       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2993       jj++;
2994     }
2995   }
2996 
2997   /* create our matrix */
2998   for (i=0; i<m; i++) {
2999     ourlens[i] -= offlens[i];
3000   }
3001   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3002   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3003   ierr = MatSetType(A,type);CHKERRQ(ierr);
3004   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3005 
3006   for (i=0; i<m; i++) {
3007     ourlens[i] += offlens[i];
3008   }
3009 
3010   if (!rank) {
3011     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3012 
3013     /* read in my part of the matrix numerical values  */
3014     nz   = procsnz[0];
3015     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3016 
3017     /* insert into matrix */
3018     jj      = rstart;
3019     smycols = mycols;
3020     svals   = vals;
3021     for (i=0; i<m; i++) {
3022       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3023       smycols += ourlens[i];
3024       svals   += ourlens[i];
3025       jj++;
3026     }
3027 
3028     /* read in other processors and ship out */
3029     for (i=1; i<size; i++) {
3030       nz     = procsnz[i];
3031       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3032       mpicnt = PetscMPIIntCast(nz);
3033       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3034     }
3035     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3036   } else {
3037     /* receive numeric values */
3038     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3039 
3040     /* receive message of values*/
3041     mpicnt = PetscMPIIntCast(nz);
3042     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3043     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3044     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3045     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3046     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3047 
3048     /* insert into matrix */
3049     jj      = rstart;
3050     smycols = mycols;
3051     svals   = vals;
3052     for (i=0; i<m; i++) {
3053       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3054       smycols += ourlens[i];
3055       svals   += ourlens[i];
3056       jj++;
3057     }
3058   }
3059   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3060   ierr = PetscFree(vals);CHKERRQ(ierr);
3061   ierr = PetscFree(mycols);CHKERRQ(ierr);
3062   ierr = PetscFree(rowners);CHKERRQ(ierr);
3063 
3064   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3065   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3066   *newmat = A;
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3072 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3073 {
3074   PetscErrorCode ierr;
3075   IS             iscol_local;
3076   PetscInt       csize;
3077 
3078   PetscFunctionBegin;
3079   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3080   if (call == MAT_REUSE_MATRIX) {
3081     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3082     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3083   } else {
3084     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3085   }
3086   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3087   if (call == MAT_INITIAL_MATRIX) {
3088     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3089     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3090   }
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 #undef __FUNCT__
3095 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3096 /*
3097     Not great since it makes two copies of the submatrix, first an SeqAIJ
3098   in local and then by concatenating the local matrices the end result.
3099   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3100 
3101   Note: This requires a sequential iscol with all indices.
3102 */
3103 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3104 {
3105   PetscErrorCode ierr;
3106   PetscMPIInt    rank,size;
3107   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3108   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3109   Mat            *local,M,Mreuse;
3110   MatScalar      *vwork,*aa;
3111   MPI_Comm       comm = ((PetscObject)mat)->comm;
3112   Mat_SeqAIJ     *aij;
3113 
3114 
3115   PetscFunctionBegin;
3116   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3117   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3118 
3119   if (call ==  MAT_REUSE_MATRIX) {
3120     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3121     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3122     local = &Mreuse;
3123     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3124   } else {
3125     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3126     Mreuse = *local;
3127     ierr   = PetscFree(local);CHKERRQ(ierr);
3128   }
3129 
3130   /*
3131       m - number of local rows
3132       n - number of columns (same on all processors)
3133       rstart - first row in new global matrix generated
3134   */
3135   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3136   if (call == MAT_INITIAL_MATRIX) {
3137     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3138     ii  = aij->i;
3139     jj  = aij->j;
3140 
3141     /*
3142         Determine the number of non-zeros in the diagonal and off-diagonal
3143         portions of the matrix in order to do correct preallocation
3144     */
3145 
3146     /* first get start and end of "diagonal" columns */
3147     if (csize == PETSC_DECIDE) {
3148       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3149       if (mglobal == n) { /* square matrix */
3150 	nlocal = m;
3151       } else {
3152         nlocal = n/size + ((n % size) > rank);
3153       }
3154     } else {
3155       nlocal = csize;
3156     }
3157     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3158     rstart = rend - nlocal;
3159     if (rank == size - 1 && rend != n) {
3160       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3161     }
3162 
3163     /* next, compute all the lengths */
3164     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3165     olens = dlens + m;
3166     for (i=0; i<m; i++) {
3167       jend = ii[i+1] - ii[i];
3168       olen = 0;
3169       dlen = 0;
3170       for (j=0; j<jend; j++) {
3171         if (*jj < rstart || *jj >= rend) olen++;
3172         else dlen++;
3173         jj++;
3174       }
3175       olens[i] = olen;
3176       dlens[i] = dlen;
3177     }
3178     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3179     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3180     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3181     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3182     ierr = PetscFree(dlens);CHKERRQ(ierr);
3183   } else {
3184     PetscInt ml,nl;
3185 
3186     M = *newmat;
3187     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3188     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3189     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3190     /*
3191          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3192        rather than the slower MatSetValues().
3193     */
3194     M->was_assembled = PETSC_TRUE;
3195     M->assembled     = PETSC_FALSE;
3196   }
3197   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3198   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3199   ii  = aij->i;
3200   jj  = aij->j;
3201   aa  = aij->a;
3202   for (i=0; i<m; i++) {
3203     row   = rstart + i;
3204     nz    = ii[i+1] - ii[i];
3205     cwork = jj;     jj += nz;
3206     vwork = aa;     aa += nz;
3207     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3208   }
3209 
3210   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3211   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3212   *newmat = M;
3213 
3214   /* save submatrix used in processor for next request */
3215   if (call ==  MAT_INITIAL_MATRIX) {
3216     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3217     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3218   }
3219 
3220   PetscFunctionReturn(0);
3221 }
3222 
3223 EXTERN_C_BEGIN
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3226 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3227 {
3228   PetscInt       m,cstart, cend,j,nnz,i,d;
3229   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3230   const PetscInt *JJ;
3231   PetscScalar    *values;
3232   PetscErrorCode ierr;
3233 
3234   PetscFunctionBegin;
3235   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3236 
3237   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3238   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3239   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3240   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3241   m      = B->rmap->n;
3242   cstart = B->cmap->rstart;
3243   cend   = B->cmap->rend;
3244   rstart = B->rmap->rstart;
3245 
3246   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3247   o_nnz = d_nnz + m;
3248 
3249 #if defined(PETSC_USE_DEBUGGING)
3250   for (i=0; i<m; i++) {
3251     nnz     = Ii[i+1]- Ii[i];
3252     JJ      = J + Ii[i];
3253     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3254     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3255     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3256     for (j=1; j<nnz; j++) {
3257       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3258     }
3259   }
3260 #endif
3261 
3262   for (i=0; i<m; i++) {
3263     nnz     = Ii[i+1]- Ii[i];
3264     JJ      = J + Ii[i];
3265     nnz_max = PetscMax(nnz_max,nnz);
3266     for (j=0; j<nnz; j++) {
3267       if (*JJ >= cstart) break;
3268       JJ++;
3269     }
3270     d = 0;
3271     for (; j<nnz; j++) {
3272       if (*JJ++ >= cend) break;
3273       d++;
3274     }
3275     d_nnz[i] = d;
3276     o_nnz[i] = nnz - d;
3277   }
3278   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3279   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3280 
3281   if (v) values = (PetscScalar*)v;
3282   else {
3283     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3284     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3285   }
3286 
3287   for (i=0; i<m; i++) {
3288     ii   = i + rstart;
3289     nnz  = Ii[i+1]- Ii[i];
3290     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3291   }
3292   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3293   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3294 
3295   if (!v) {
3296     ierr = PetscFree(values);CHKERRQ(ierr);
3297   }
3298   PetscFunctionReturn(0);
3299 }
3300 EXTERN_C_END
3301 
3302 #undef __FUNCT__
3303 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3304 /*@
3305    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3306    (the default parallel PETSc format).
3307 
3308    Collective on MPI_Comm
3309 
3310    Input Parameters:
3311 +  B - the matrix
3312 .  i - the indices into j for the start of each local row (starts with zero)
3313 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3314 -  v - optional values in the matrix
3315 
3316    Level: developer
3317 
3318    Notes:
3319        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3320      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3321      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3322 
3323        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3324 
3325        The format which is used for the sparse matrix input, is equivalent to a
3326     row-major ordering.. i.e for the following matrix, the input data expected is
3327     as shown:
3328 
3329         1 0 0
3330         2 0 3     P0
3331        -------
3332         4 5 6     P1
3333 
3334      Process0 [P0]: rows_owned=[0,1]
3335         i =  {0,1,3}  [size = nrow+1  = 2+1]
3336         j =  {0,0,2}  [size = nz = 6]
3337         v =  {1,2,3}  [size = nz = 6]
3338 
3339      Process1 [P1]: rows_owned=[2]
3340         i =  {0,3}    [size = nrow+1  = 1+1]
3341         j =  {0,1,2}  [size = nz = 6]
3342         v =  {4,5,6}  [size = nz = 6]
3343 
3344       The column indices for each row MUST be sorted.
3345 
3346 .keywords: matrix, aij, compressed row, sparse, parallel
3347 
3348 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3349           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3350 @*/
3351 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3352 {
3353   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3354 
3355   PetscFunctionBegin;
3356   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3357   if (f) {
3358     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3359   }
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 #undef __FUNCT__
3364 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3365 /*@C
3366    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3367    (the default parallel PETSc format).  For good matrix assembly performance
3368    the user should preallocate the matrix storage by setting the parameters
3369    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3370    performance can be increased by more than a factor of 50.
3371 
3372    Collective on MPI_Comm
3373 
3374    Input Parameters:
3375 +  A - the matrix
3376 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3377            (same value is used for all local rows)
3378 .  d_nnz - array containing the number of nonzeros in the various rows of the
3379            DIAGONAL portion of the local submatrix (possibly different for each row)
3380            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3381            The size of this array is equal to the number of local rows, i.e 'm'.
3382            You must leave room for the diagonal entry even if it is zero.
3383 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3384            submatrix (same value is used for all local rows).
3385 -  o_nnz - array containing the number of nonzeros in the various rows of the
3386            OFF-DIAGONAL portion of the local submatrix (possibly different for
3387            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3388            structure. The size of this array is equal to the number
3389            of local rows, i.e 'm'.
3390 
3391    If the *_nnz parameter is given then the *_nz parameter is ignored
3392 
3393    The AIJ format (also called the Yale sparse matrix format or
3394    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3395    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3396 
3397    The parallel matrix is partitioned such that the first m0 rows belong to
3398    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3399    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3400 
3401    The DIAGONAL portion of the local submatrix of a processor can be defined
3402    as the submatrix which is obtained by extraction the part corresponding
3403    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3404    first row that belongs to the processor, and r2 is the last row belonging
3405    to the this processor. This is a square mxm matrix. The remaining portion
3406    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3407 
3408    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3409 
3410    You can call MatGetInfo() to get information on how effective the preallocation was;
3411    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3412    You can also run with the option -info and look for messages with the string
3413    malloc in them to see if additional memory allocation was needed.
3414 
3415    Example usage:
3416 
3417    Consider the following 8x8 matrix with 34 non-zero values, that is
3418    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3419    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3420    as follows:
3421 
3422 .vb
3423             1  2  0  |  0  3  0  |  0  4
3424     Proc0   0  5  6  |  7  0  0  |  8  0
3425             9  0 10  | 11  0  0  | 12  0
3426     -------------------------------------
3427            13  0 14  | 15 16 17  |  0  0
3428     Proc1   0 18  0  | 19 20 21  |  0  0
3429             0  0  0  | 22 23  0  | 24  0
3430     -------------------------------------
3431     Proc2  25 26 27  |  0  0 28  | 29  0
3432            30  0  0  | 31 32 33  |  0 34
3433 .ve
3434 
3435    This can be represented as a collection of submatrices as:
3436 
3437 .vb
3438       A B C
3439       D E F
3440       G H I
3441 .ve
3442 
3443    Where the submatrices A,B,C are owned by proc0, D,E,F are
3444    owned by proc1, G,H,I are owned by proc2.
3445 
3446    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3447    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3448    The 'M','N' parameters are 8,8, and have the same values on all procs.
3449 
3450    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3451    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3452    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3453    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3454    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3455    matrix, ans [DF] as another SeqAIJ matrix.
3456 
3457    When d_nz, o_nz parameters are specified, d_nz storage elements are
3458    allocated for every row of the local diagonal submatrix, and o_nz
3459    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3460    One way to choose d_nz and o_nz is to use the max nonzerors per local
3461    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3462    In this case, the values of d_nz,o_nz are:
3463 .vb
3464      proc0 : dnz = 2, o_nz = 2
3465      proc1 : dnz = 3, o_nz = 2
3466      proc2 : dnz = 1, o_nz = 4
3467 .ve
3468    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3469    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3470    for proc3. i.e we are using 12+15+10=37 storage locations to store
3471    34 values.
3472 
3473    When d_nnz, o_nnz parameters are specified, the storage is specified
3474    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3475    In the above case the values for d_nnz,o_nnz are:
3476 .vb
3477      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3478      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3479      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3480 .ve
3481    Here the space allocated is sum of all the above values i.e 34, and
3482    hence pre-allocation is perfect.
3483 
3484    Level: intermediate
3485 
3486 .keywords: matrix, aij, compressed row, sparse, parallel
3487 
3488 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3489           MPIAIJ, MatGetInfo()
3490 @*/
3491 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3492 {
3493   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3494 
3495   PetscFunctionBegin;
3496   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3497   if (f) {
3498     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3499   }
3500   PetscFunctionReturn(0);
3501 }
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3505 /*@
3506      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3507          CSR format the local rows.
3508 
3509    Collective on MPI_Comm
3510 
3511    Input Parameters:
3512 +  comm - MPI communicator
3513 .  m - number of local rows (Cannot be PETSC_DECIDE)
3514 .  n - This value should be the same as the local size used in creating the
3515        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3516        calculated if N is given) For square matrices n is almost always m.
3517 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3518 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3519 .   i - row indices
3520 .   j - column indices
3521 -   a - matrix values
3522 
3523    Output Parameter:
3524 .   mat - the matrix
3525 
3526    Level: intermediate
3527 
3528    Notes:
3529        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3530      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3531      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3532 
3533        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3534 
3535        The format which is used for the sparse matrix input, is equivalent to a
3536     row-major ordering.. i.e for the following matrix, the input data expected is
3537     as shown:
3538 
3539         1 0 0
3540         2 0 3     P0
3541        -------
3542         4 5 6     P1
3543 
3544      Process0 [P0]: rows_owned=[0,1]
3545         i =  {0,1,3}  [size = nrow+1  = 2+1]
3546         j =  {0,0,2}  [size = nz = 6]
3547         v =  {1,2,3}  [size = nz = 6]
3548 
3549      Process1 [P1]: rows_owned=[2]
3550         i =  {0,3}    [size = nrow+1  = 1+1]
3551         j =  {0,1,2}  [size = nz = 6]
3552         v =  {4,5,6}  [size = nz = 6]
3553 
3554 .keywords: matrix, aij, compressed row, sparse, parallel
3555 
3556 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3557           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3558 @*/
3559 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3560 {
3561   PetscErrorCode ierr;
3562 
3563  PetscFunctionBegin;
3564   if (i[0]) {
3565     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3566   }
3567   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3568   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3569   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3570   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3571   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3572   PetscFunctionReturn(0);
3573 }
3574 
3575 #undef __FUNCT__
3576 #define __FUNCT__ "MatCreateMPIAIJ"
3577 /*@C
3578    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3579    (the default parallel PETSc format).  For good matrix assembly performance
3580    the user should preallocate the matrix storage by setting the parameters
3581    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3582    performance can be increased by more than a factor of 50.
3583 
3584    Collective on MPI_Comm
3585 
3586    Input Parameters:
3587 +  comm - MPI communicator
3588 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3589            This value should be the same as the local size used in creating the
3590            y vector for the matrix-vector product y = Ax.
3591 .  n - This value should be the same as the local size used in creating the
3592        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3593        calculated if N is given) For square matrices n is almost always m.
3594 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3595 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3596 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3597            (same value is used for all local rows)
3598 .  d_nnz - array containing the number of nonzeros in the various rows of the
3599            DIAGONAL portion of the local submatrix (possibly different for each row)
3600            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3601            The size of this array is equal to the number of local rows, i.e 'm'.
3602            You must leave room for the diagonal entry even if it is zero.
3603 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3604            submatrix (same value is used for all local rows).
3605 -  o_nnz - array containing the number of nonzeros in the various rows of the
3606            OFF-DIAGONAL portion of the local submatrix (possibly different for
3607            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3608            structure. The size of this array is equal to the number
3609            of local rows, i.e 'm'.
3610 
3611    Output Parameter:
3612 .  A - the matrix
3613 
3614    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3615    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3616    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3617 
3618    Notes:
3619    If the *_nnz parameter is given then the *_nz parameter is ignored
3620 
3621    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3622    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3623    storage requirements for this matrix.
3624 
3625    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3626    processor than it must be used on all processors that share the object for
3627    that argument.
3628 
3629    The user MUST specify either the local or global matrix dimensions
3630    (possibly both).
3631 
3632    The parallel matrix is partitioned across processors such that the
3633    first m0 rows belong to process 0, the next m1 rows belong to
3634    process 1, the next m2 rows belong to process 2 etc.. where
3635    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3636    values corresponding to [m x N] submatrix.
3637 
3638    The columns are logically partitioned with the n0 columns belonging
3639    to 0th partition, the next n1 columns belonging to the next
3640    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3641 
3642    The DIAGONAL portion of the local submatrix on any given processor
3643    is the submatrix corresponding to the rows and columns m,n
3644    corresponding to the given processor. i.e diagonal matrix on
3645    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3646    etc. The remaining portion of the local submatrix [m x (N-n)]
3647    constitute the OFF-DIAGONAL portion. The example below better
3648    illustrates this concept.
3649 
3650    For a square global matrix we define each processor's diagonal portion
3651    to be its local rows and the corresponding columns (a square submatrix);
3652    each processor's off-diagonal portion encompasses the remainder of the
3653    local matrix (a rectangular submatrix).
3654 
3655    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3656 
3657    When calling this routine with a single process communicator, a matrix of
3658    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3659    type of communicator, use the construction mechanism:
3660      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3661 
3662    By default, this format uses inodes (identical nodes) when possible.
3663    We search for consecutive rows with the same nonzero structure, thereby
3664    reusing matrix information to achieve increased efficiency.
3665 
3666    Options Database Keys:
3667 +  -mat_no_inode  - Do not use inodes
3668 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3669 -  -mat_aij_oneindex - Internally use indexing starting at 1
3670         rather than 0.  Note that when calling MatSetValues(),
3671         the user still MUST index entries starting at 0!
3672 
3673 
3674    Example usage:
3675 
3676    Consider the following 8x8 matrix with 34 non-zero values, that is
3677    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3678    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3679    as follows:
3680 
3681 .vb
3682             1  2  0  |  0  3  0  |  0  4
3683     Proc0   0  5  6  |  7  0  0  |  8  0
3684             9  0 10  | 11  0  0  | 12  0
3685     -------------------------------------
3686            13  0 14  | 15 16 17  |  0  0
3687     Proc1   0 18  0  | 19 20 21  |  0  0
3688             0  0  0  | 22 23  0  | 24  0
3689     -------------------------------------
3690     Proc2  25 26 27  |  0  0 28  | 29  0
3691            30  0  0  | 31 32 33  |  0 34
3692 .ve
3693 
3694    This can be represented as a collection of submatrices as:
3695 
3696 .vb
3697       A B C
3698       D E F
3699       G H I
3700 .ve
3701 
3702    Where the submatrices A,B,C are owned by proc0, D,E,F are
3703    owned by proc1, G,H,I are owned by proc2.
3704 
3705    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3706    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3707    The 'M','N' parameters are 8,8, and have the same values on all procs.
3708 
3709    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3710    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3711    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3712    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3713    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3714    matrix, ans [DF] as another SeqAIJ matrix.
3715 
3716    When d_nz, o_nz parameters are specified, d_nz storage elements are
3717    allocated for every row of the local diagonal submatrix, and o_nz
3718    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3719    One way to choose d_nz and o_nz is to use the max nonzerors per local
3720    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3721    In this case, the values of d_nz,o_nz are:
3722 .vb
3723      proc0 : dnz = 2, o_nz = 2
3724      proc1 : dnz = 3, o_nz = 2
3725      proc2 : dnz = 1, o_nz = 4
3726 .ve
3727    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3728    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3729    for proc3. i.e we are using 12+15+10=37 storage locations to store
3730    34 values.
3731 
3732    When d_nnz, o_nnz parameters are specified, the storage is specified
3733    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3734    In the above case the values for d_nnz,o_nnz are:
3735 .vb
3736      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3737      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3738      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3739 .ve
3740    Here the space allocated is sum of all the above values i.e 34, and
3741    hence pre-allocation is perfect.
3742 
3743    Level: intermediate
3744 
3745 .keywords: matrix, aij, compressed row, sparse, parallel
3746 
3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3748           MPIAIJ, MatCreateMPIAIJWithArrays()
3749 @*/
3750 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3751 {
3752   PetscErrorCode ierr;
3753   PetscMPIInt    size;
3754 
3755   PetscFunctionBegin;
3756   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3757   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3758   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3759   if (size > 1) {
3760     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3761     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3762   } else {
3763     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3764     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3765   }
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 #undef __FUNCT__
3770 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3771 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3772 {
3773   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3774 
3775   PetscFunctionBegin;
3776   *Ad     = a->A;
3777   *Ao     = a->B;
3778   *colmap = a->garray;
3779   PetscFunctionReturn(0);
3780 }
3781 
3782 #undef __FUNCT__
3783 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3784 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3785 {
3786   PetscErrorCode ierr;
3787   PetscInt       i;
3788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3789 
3790   PetscFunctionBegin;
3791   if (coloring->ctype == IS_COLORING_GLOBAL) {
3792     ISColoringValue *allcolors,*colors;
3793     ISColoring      ocoloring;
3794 
3795     /* set coloring for diagonal portion */
3796     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3797 
3798     /* set coloring for off-diagonal portion */
3799     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3800     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3801     for (i=0; i<a->B->cmap->n; i++) {
3802       colors[i] = allcolors[a->garray[i]];
3803     }
3804     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3805     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3806     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3807     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3808   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3809     ISColoringValue *colors;
3810     PetscInt        *larray;
3811     ISColoring      ocoloring;
3812 
3813     /* set coloring for diagonal portion */
3814     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3815     for (i=0; i<a->A->cmap->n; i++) {
3816       larray[i] = i + A->cmap->rstart;
3817     }
3818     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3819     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3820     for (i=0; i<a->A->cmap->n; i++) {
3821       colors[i] = coloring->colors[larray[i]];
3822     }
3823     ierr = PetscFree(larray);CHKERRQ(ierr);
3824     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3825     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3826     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3827 
3828     /* set coloring for off-diagonal portion */
3829     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3830     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3831     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3832     for (i=0; i<a->B->cmap->n; i++) {
3833       colors[i] = coloring->colors[larray[i]];
3834     }
3835     ierr = PetscFree(larray);CHKERRQ(ierr);
3836     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3837     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3838     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3839   } else {
3840     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3841   }
3842 
3843   PetscFunctionReturn(0);
3844 }
3845 
3846 #if defined(PETSC_HAVE_ADIC)
3847 #undef __FUNCT__
3848 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3849 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3850 {
3851   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3852   PetscErrorCode ierr;
3853 
3854   PetscFunctionBegin;
3855   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3856   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3857   PetscFunctionReturn(0);
3858 }
3859 #endif
3860 
3861 #undef __FUNCT__
3862 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3863 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3864 {
3865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3866   PetscErrorCode ierr;
3867 
3868   PetscFunctionBegin;
3869   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3870   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3871   PetscFunctionReturn(0);
3872 }
3873 
3874 #undef __FUNCT__
3875 #define __FUNCT__ "MatMerge"
3876 /*@
3877       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3878                  matrices from each processor
3879 
3880     Collective on MPI_Comm
3881 
3882    Input Parameters:
3883 +    comm - the communicators the parallel matrix will live on
3884 .    inmat - the input sequential matrices
3885 .    n - number of local columns (or PETSC_DECIDE)
3886 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3887 
3888    Output Parameter:
3889 .    outmat - the parallel matrix generated
3890 
3891     Level: advanced
3892 
3893    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3894 
3895 @*/
3896 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3897 {
3898   PetscErrorCode ierr;
3899   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3900   PetscInt       *indx;
3901   PetscScalar    *values;
3902 
3903   PetscFunctionBegin;
3904   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3905   if (scall == MAT_INITIAL_MATRIX){
3906     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3907     if (n == PETSC_DECIDE){
3908       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3909     }
3910     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3911     rstart -= m;
3912 
3913     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3914     for (i=0;i<m;i++) {
3915       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3916       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3917       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3918     }
3919     /* This routine will ONLY return MPIAIJ type matrix */
3920     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3921     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3922     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3923     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3924     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3925 
3926   } else if (scall == MAT_REUSE_MATRIX){
3927     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3928   } else {
3929     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3930   }
3931 
3932   for (i=0;i<m;i++) {
3933     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3934     Ii    = i + rstart;
3935     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3936     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3937   }
3938   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3939   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3940   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3941 
3942   PetscFunctionReturn(0);
3943 }
3944 
3945 #undef __FUNCT__
3946 #define __FUNCT__ "MatFileSplit"
3947 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3948 {
3949   PetscErrorCode    ierr;
3950   PetscMPIInt       rank;
3951   PetscInt          m,N,i,rstart,nnz;
3952   size_t            len;
3953   const PetscInt    *indx;
3954   PetscViewer       out;
3955   char              *name;
3956   Mat               B;
3957   const PetscScalar *values;
3958 
3959   PetscFunctionBegin;
3960   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3961   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3962   /* Should this be the type of the diagonal block of A? */
3963   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3964   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3965   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3966   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3967   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3968   for (i=0;i<m;i++) {
3969     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3970     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3971     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3972   }
3973   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3974   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975 
3976   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3977   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3978   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3979   sprintf(name,"%s.%d",outfile,rank);
3980   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3981   ierr = PetscFree(name);
3982   ierr = MatView(B,out);CHKERRQ(ierr);
3983   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3984   ierr = MatDestroy(B);CHKERRQ(ierr);
3985   PetscFunctionReturn(0);
3986 }
3987 
3988 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3989 #undef __FUNCT__
3990 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3991 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3992 {
3993   PetscErrorCode       ierr;
3994   Mat_Merge_SeqsToMPI  *merge;
3995   PetscContainer       container;
3996 
3997   PetscFunctionBegin;
3998   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3999   if (container) {
4000     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4006     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4007     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4008     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4009     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4010     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4011     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4012 
4013     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4014     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4015   }
4016   ierr = PetscFree(merge);CHKERRQ(ierr);
4017 
4018   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4019   PetscFunctionReturn(0);
4020 }
4021 
4022 #include "../src/mat/utils/freespace.h"
4023 #include "petscbt.h"
4024 
4025 #undef __FUNCT__
4026 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4027 /*@C
4028       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4029                  matrices from each processor
4030 
4031     Collective on MPI_Comm
4032 
4033    Input Parameters:
4034 +    comm - the communicators the parallel matrix will live on
4035 .    seqmat - the input sequential matrices
4036 .    m - number of local rows (or PETSC_DECIDE)
4037 .    n - number of local columns (or PETSC_DECIDE)
4038 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4039 
4040    Output Parameter:
4041 .    mpimat - the parallel matrix generated
4042 
4043     Level: advanced
4044 
4045    Notes:
4046      The dimensions of the sequential matrix in each processor MUST be the same.
4047      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4048      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4049 @*/
4050 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4051 {
4052   PetscErrorCode       ierr;
4053   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4054   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4055   PetscMPIInt          size,rank,taga,*len_s;
4056   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4057   PetscInt             proc,m;
4058   PetscInt             **buf_ri,**buf_rj;
4059   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4060   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4061   MPI_Request          *s_waits,*r_waits;
4062   MPI_Status           *status;
4063   MatScalar            *aa=a->a;
4064   MatScalar            **abuf_r,*ba_i;
4065   Mat_Merge_SeqsToMPI  *merge;
4066   PetscContainer       container;
4067 
4068   PetscFunctionBegin;
4069   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4070 
4071   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4072   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4073 
4074   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4075   if (container) {
4076     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4077   }
4078   bi     = merge->bi;
4079   bj     = merge->bj;
4080   buf_ri = merge->buf_ri;
4081   buf_rj = merge->buf_rj;
4082 
4083   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4084   owners = merge->rowmap->range;
4085   len_s  = merge->len_s;
4086 
4087   /* send and recv matrix values */
4088   /*-----------------------------*/
4089   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4090   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4091 
4092   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4093   for (proc=0,k=0; proc<size; proc++){
4094     if (!len_s[proc]) continue;
4095     i = owners[proc];
4096     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4097     k++;
4098   }
4099 
4100   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4101   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4102   ierr = PetscFree(status);CHKERRQ(ierr);
4103 
4104   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4105   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4106 
4107   /* insert mat values of mpimat */
4108   /*----------------------------*/
4109   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4110   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4111   nextrow = buf_ri_k + merge->nrecv;
4112   nextai  = nextrow + merge->nrecv;
4113 
4114   for (k=0; k<merge->nrecv; k++){
4115     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4116     nrows = *(buf_ri_k[k]);
4117     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4118     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4119   }
4120 
4121   /* set values of ba */
4122   m = merge->rowmap->n;
4123   for (i=0; i<m; i++) {
4124     arow = owners[rank] + i;
4125     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4126     bnzi = bi[i+1] - bi[i];
4127     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4128 
4129     /* add local non-zero vals of this proc's seqmat into ba */
4130     anzi = ai[arow+1] - ai[arow];
4131     aj   = a->j + ai[arow];
4132     aa   = a->a + ai[arow];
4133     nextaj = 0;
4134     for (j=0; nextaj<anzi; j++){
4135       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4136         ba_i[j] += aa[nextaj++];
4137       }
4138     }
4139 
4140     /* add received vals into ba */
4141     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4142       /* i-th row */
4143       if (i == *nextrow[k]) {
4144         anzi = *(nextai[k]+1) - *nextai[k];
4145         aj   = buf_rj[k] + *(nextai[k]);
4146         aa   = abuf_r[k] + *(nextai[k]);
4147         nextaj = 0;
4148         for (j=0; nextaj<anzi; j++){
4149           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4150             ba_i[j] += aa[nextaj++];
4151           }
4152         }
4153         nextrow[k]++; nextai[k]++;
4154       }
4155     }
4156     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4157   }
4158   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4159   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4160 
4161   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4162   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4163   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4164   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4165   PetscFunctionReturn(0);
4166 }
4167 
4168 #undef __FUNCT__
4169 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4170 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4171 {
4172   PetscErrorCode       ierr;
4173   Mat                  B_mpi;
4174   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4175   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4176   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4177   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4178   PetscInt             len,proc,*dnz,*onz;
4179   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4180   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4181   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4182   MPI_Status           *status;
4183   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4184   PetscBT              lnkbt;
4185   Mat_Merge_SeqsToMPI  *merge;
4186   PetscContainer       container;
4187 
4188   PetscFunctionBegin;
4189   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4190 
4191   /* make sure it is a PETSc comm */
4192   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4193   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4195 
4196   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4197   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4198 
4199   /* determine row ownership */
4200   /*---------------------------------------------------------*/
4201   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4202   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4203   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4204   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4205   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4206   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4207   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4208 
4209   m      = merge->rowmap->n;
4210   M      = merge->rowmap->N;
4211   owners = merge->rowmap->range;
4212 
4213   /* determine the number of messages to send, their lengths */
4214   /*---------------------------------------------------------*/
4215   len_s  = merge->len_s;
4216 
4217   len = 0;  /* length of buf_si[] */
4218   merge->nsend = 0;
4219   for (proc=0; proc<size; proc++){
4220     len_si[proc] = 0;
4221     if (proc == rank){
4222       len_s[proc] = 0;
4223     } else {
4224       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4225       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4226     }
4227     if (len_s[proc]) {
4228       merge->nsend++;
4229       nrows = 0;
4230       for (i=owners[proc]; i<owners[proc+1]; i++){
4231         if (ai[i+1] > ai[i]) nrows++;
4232       }
4233       len_si[proc] = 2*(nrows+1);
4234       len += len_si[proc];
4235     }
4236   }
4237 
4238   /* determine the number and length of messages to receive for ij-structure */
4239   /*-------------------------------------------------------------------------*/
4240   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4241   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4242 
4243   /* post the Irecv of j-structure */
4244   /*-------------------------------*/
4245   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4246   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4247 
4248   /* post the Isend of j-structure */
4249   /*--------------------------------*/
4250   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4251   sj_waits = si_waits + merge->nsend;
4252 
4253   for (proc=0, k=0; proc<size; proc++){
4254     if (!len_s[proc]) continue;
4255     i = owners[proc];
4256     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4257     k++;
4258   }
4259 
4260   /* receives and sends of j-structure are complete */
4261   /*------------------------------------------------*/
4262   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4263   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4264 
4265   /* send and recv i-structure */
4266   /*---------------------------*/
4267   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4268   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4269 
4270   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4271   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4272   for (proc=0,k=0; proc<size; proc++){
4273     if (!len_s[proc]) continue;
4274     /* form outgoing message for i-structure:
4275          buf_si[0]:                 nrows to be sent
4276                [1:nrows]:           row index (global)
4277                [nrows+1:2*nrows+1]: i-structure index
4278     */
4279     /*-------------------------------------------*/
4280     nrows = len_si[proc]/2 - 1;
4281     buf_si_i    = buf_si + nrows+1;
4282     buf_si[0]   = nrows;
4283     buf_si_i[0] = 0;
4284     nrows = 0;
4285     for (i=owners[proc]; i<owners[proc+1]; i++){
4286       anzi = ai[i+1] - ai[i];
4287       if (anzi) {
4288         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4289         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4290         nrows++;
4291       }
4292     }
4293     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4294     k++;
4295     buf_si += len_si[proc];
4296   }
4297 
4298   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4299   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4300 
4301   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4302   for (i=0; i<merge->nrecv; i++){
4303     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4304   }
4305 
4306   ierr = PetscFree(len_si);CHKERRQ(ierr);
4307   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4308   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4309   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4310   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4311   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4312   ierr = PetscFree(status);CHKERRQ(ierr);
4313 
4314   /* compute a local seq matrix in each processor */
4315   /*----------------------------------------------*/
4316   /* allocate bi array and free space for accumulating nonzero column info */
4317   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4318   bi[0] = 0;
4319 
4320   /* create and initialize a linked list */
4321   nlnk = N+1;
4322   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4323 
4324   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4325   len = 0;
4326   len  = ai[owners[rank+1]] - ai[owners[rank]];
4327   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4328   current_space = free_space;
4329 
4330   /* determine symbolic info for each local row */
4331   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4332   nextrow = buf_ri_k + merge->nrecv;
4333   nextai  = nextrow + merge->nrecv;
4334   for (k=0; k<merge->nrecv; k++){
4335     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4336     nrows = *buf_ri_k[k];
4337     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4338     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4339   }
4340 
4341   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4342   len = 0;
4343   for (i=0;i<m;i++) {
4344     bnzi   = 0;
4345     /* add local non-zero cols of this proc's seqmat into lnk */
4346     arow   = owners[rank] + i;
4347     anzi   = ai[arow+1] - ai[arow];
4348     aj     = a->j + ai[arow];
4349     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4350     bnzi += nlnk;
4351     /* add received col data into lnk */
4352     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4353       if (i == *nextrow[k]) { /* i-th row */
4354         anzi = *(nextai[k]+1) - *nextai[k];
4355         aj   = buf_rj[k] + *nextai[k];
4356         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4357         bnzi += nlnk;
4358         nextrow[k]++; nextai[k]++;
4359       }
4360     }
4361     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4362 
4363     /* if free space is not available, make more free space */
4364     if (current_space->local_remaining<bnzi) {
4365       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4366       nspacedouble++;
4367     }
4368     /* copy data into free space, then initialize lnk */
4369     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4370     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4371 
4372     current_space->array           += bnzi;
4373     current_space->local_used      += bnzi;
4374     current_space->local_remaining -= bnzi;
4375 
4376     bi[i+1] = bi[i] + bnzi;
4377   }
4378 
4379   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4380 
4381   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4382   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4383   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4384 
4385   /* create symbolic parallel matrix B_mpi */
4386   /*---------------------------------------*/
4387   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4388   if (n==PETSC_DECIDE) {
4389     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4390   } else {
4391     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4392   }
4393   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4394   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4395   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4396 
4397   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4398   B_mpi->assembled     = PETSC_FALSE;
4399   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4400   merge->bi            = bi;
4401   merge->bj            = bj;
4402   merge->buf_ri        = buf_ri;
4403   merge->buf_rj        = buf_rj;
4404   merge->coi           = PETSC_NULL;
4405   merge->coj           = PETSC_NULL;
4406   merge->owners_co     = PETSC_NULL;
4407 
4408   /* attach the supporting struct to B_mpi for reuse */
4409   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4410   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4411   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4412   *mpimat = B_mpi;
4413 
4414   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4415   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4416   PetscFunctionReturn(0);
4417 }
4418 
4419 #undef __FUNCT__
4420 #define __FUNCT__ "MatMerge_SeqsToMPI"
4421 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4422 {
4423   PetscErrorCode   ierr;
4424 
4425   PetscFunctionBegin;
4426   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4427   if (scall == MAT_INITIAL_MATRIX){
4428     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4429   }
4430   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4431   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4432   PetscFunctionReturn(0);
4433 }
4434 
4435 #undef __FUNCT__
4436 #define __FUNCT__ "MatGetLocalMat"
4437 /*@
4438      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4439 
4440     Not Collective
4441 
4442    Input Parameters:
4443 +    A - the matrix
4444 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4445 
4446    Output Parameter:
4447 .    A_loc - the local sequential matrix generated
4448 
4449     Level: developer
4450 
4451 @*/
4452 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4453 {
4454   PetscErrorCode  ierr;
4455   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4456   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4457   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4458   MatScalar       *aa=a->a,*ba=b->a,*cam;
4459   PetscScalar     *ca;
4460   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4461   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4462 
4463   PetscFunctionBegin;
4464   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4465   if (scall == MAT_INITIAL_MATRIX){
4466     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4467     ci[0] = 0;
4468     for (i=0; i<am; i++){
4469       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4470     }
4471     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4472     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4473     k = 0;
4474     for (i=0; i<am; i++) {
4475       ncols_o = bi[i+1] - bi[i];
4476       ncols_d = ai[i+1] - ai[i];
4477       /* off-diagonal portion of A */
4478       for (jo=0; jo<ncols_o; jo++) {
4479         col = cmap[*bj];
4480         if (col >= cstart) break;
4481         cj[k]   = col; bj++;
4482         ca[k++] = *ba++;
4483       }
4484       /* diagonal portion of A */
4485       for (j=0; j<ncols_d; j++) {
4486         cj[k]   = cstart + *aj++;
4487         ca[k++] = *aa++;
4488       }
4489       /* off-diagonal portion of A */
4490       for (j=jo; j<ncols_o; j++) {
4491         cj[k]   = cmap[*bj++];
4492         ca[k++] = *ba++;
4493       }
4494     }
4495     /* put together the new matrix */
4496     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4497     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4498     /* Since these are PETSc arrays, change flags to free them as necessary. */
4499     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4500     mat->free_a  = PETSC_TRUE;
4501     mat->free_ij = PETSC_TRUE;
4502     mat->nonew   = 0;
4503   } else if (scall == MAT_REUSE_MATRIX){
4504     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4505     ci = mat->i; cj = mat->j; cam = mat->a;
4506     for (i=0; i<am; i++) {
4507       /* off-diagonal portion of A */
4508       ncols_o = bi[i+1] - bi[i];
4509       for (jo=0; jo<ncols_o; jo++) {
4510         col = cmap[*bj];
4511         if (col >= cstart) break;
4512         *cam++ = *ba++; bj++;
4513       }
4514       /* diagonal portion of A */
4515       ncols_d = ai[i+1] - ai[i];
4516       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4517       /* off-diagonal portion of A */
4518       for (j=jo; j<ncols_o; j++) {
4519         *cam++ = *ba++; bj++;
4520       }
4521     }
4522   } else {
4523     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4524   }
4525 
4526   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4527   PetscFunctionReturn(0);
4528 }
4529 
4530 #undef __FUNCT__
4531 #define __FUNCT__ "MatGetLocalMatCondensed"
4532 /*@C
4533      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4534 
4535     Not Collective
4536 
4537    Input Parameters:
4538 +    A - the matrix
4539 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4540 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4541 
4542    Output Parameter:
4543 .    A_loc - the local sequential matrix generated
4544 
4545     Level: developer
4546 
4547 @*/
4548 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4549 {
4550   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4551   PetscErrorCode    ierr;
4552   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4553   IS                isrowa,iscola;
4554   Mat               *aloc;
4555 
4556   PetscFunctionBegin;
4557   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4558   if (!row){
4559     start = A->rmap->rstart; end = A->rmap->rend;
4560     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4561   } else {
4562     isrowa = *row;
4563   }
4564   if (!col){
4565     start = A->cmap->rstart;
4566     cmap  = a->garray;
4567     nzA   = a->A->cmap->n;
4568     nzB   = a->B->cmap->n;
4569     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4570     ncols = 0;
4571     for (i=0; i<nzB; i++) {
4572       if (cmap[i] < start) idx[ncols++] = cmap[i];
4573       else break;
4574     }
4575     imark = i;
4576     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4577     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4578     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4579     ierr = PetscFree(idx);CHKERRQ(ierr);
4580   } else {
4581     iscola = *col;
4582   }
4583   if (scall != MAT_INITIAL_MATRIX){
4584     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4585     aloc[0] = *A_loc;
4586   }
4587   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4588   *A_loc = aloc[0];
4589   ierr = PetscFree(aloc);CHKERRQ(ierr);
4590   if (!row){
4591     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4592   }
4593   if (!col){
4594     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4595   }
4596   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4597   PetscFunctionReturn(0);
4598 }
4599 
4600 #undef __FUNCT__
4601 #define __FUNCT__ "MatGetBrowsOfAcols"
4602 /*@C
4603     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4604 
4605     Collective on Mat
4606 
4607    Input Parameters:
4608 +    A,B - the matrices in mpiaij format
4609 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4610 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4611 
4612    Output Parameter:
4613 +    rowb, colb - index sets of rows and columns of B to extract
4614 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4615 -    B_seq - the sequential matrix generated
4616 
4617     Level: developer
4618 
4619 @*/
4620 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4621 {
4622   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4623   PetscErrorCode    ierr;
4624   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4625   IS                isrowb,iscolb;
4626   Mat               *bseq;
4627 
4628   PetscFunctionBegin;
4629   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4630     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4631   }
4632   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4633 
4634   if (scall == MAT_INITIAL_MATRIX){
4635     start = A->cmap->rstart;
4636     cmap  = a->garray;
4637     nzA   = a->A->cmap->n;
4638     nzB   = a->B->cmap->n;
4639     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4640     ncols = 0;
4641     for (i=0; i<nzB; i++) {  /* row < local row index */
4642       if (cmap[i] < start) idx[ncols++] = cmap[i];
4643       else break;
4644     }
4645     imark = i;
4646     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4647     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4648     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4649     ierr = PetscFree(idx);CHKERRQ(ierr);
4650     *brstart = imark;
4651     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4652   } else {
4653     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4654     isrowb = *rowb; iscolb = *colb;
4655     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4656     bseq[0] = *B_seq;
4657   }
4658   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4659   *B_seq = bseq[0];
4660   ierr = PetscFree(bseq);CHKERRQ(ierr);
4661   if (!rowb){
4662     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4663   } else {
4664     *rowb = isrowb;
4665   }
4666   if (!colb){
4667     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4668   } else {
4669     *colb = iscolb;
4670   }
4671   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4672   PetscFunctionReturn(0);
4673 }
4674 
4675 #undef __FUNCT__
4676 #define __FUNCT__ "MatGetBrowsOfAoCols"
4677 /*@C
4678     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4679     of the OFF-DIAGONAL portion of local A
4680 
4681     Collective on Mat
4682 
4683    Input Parameters:
4684 +    A,B - the matrices in mpiaij format
4685 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4686 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4687 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4688 
4689    Output Parameter:
4690 +    B_oth - the sequential matrix generated
4691 
4692     Level: developer
4693 
4694 @*/
4695 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4696 {
4697   VecScatter_MPI_General *gen_to,*gen_from;
4698   PetscErrorCode         ierr;
4699   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4700   Mat_SeqAIJ             *b_oth;
4701   VecScatter             ctx=a->Mvctx;
4702   MPI_Comm               comm=((PetscObject)ctx)->comm;
4703   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4704   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4705   PetscScalar            *rvalues,*svalues;
4706   MatScalar              *b_otha,*bufa,*bufA;
4707   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4708   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4709   MPI_Status             *sstatus,rstatus;
4710   PetscMPIInt            jj;
4711   PetscInt               *cols,sbs,rbs;
4712   PetscScalar            *vals;
4713 
4714   PetscFunctionBegin;
4715   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4716     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4717   }
4718   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4719   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4720 
4721   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4722   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4723   rvalues  = gen_from->values; /* holds the length of receiving row */
4724   svalues  = gen_to->values;   /* holds the length of sending row */
4725   nrecvs   = gen_from->n;
4726   nsends   = gen_to->n;
4727 
4728   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4729   srow     = gen_to->indices;   /* local row index to be sent */
4730   sstarts  = gen_to->starts;
4731   sprocs   = gen_to->procs;
4732   sstatus  = gen_to->sstatus;
4733   sbs      = gen_to->bs;
4734   rstarts  = gen_from->starts;
4735   rprocs   = gen_from->procs;
4736   rbs      = gen_from->bs;
4737 
4738   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4739   if (scall == MAT_INITIAL_MATRIX){
4740     /* i-array */
4741     /*---------*/
4742     /*  post receives */
4743     for (i=0; i<nrecvs; i++){
4744       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4745       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4746       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4747     }
4748 
4749     /* pack the outgoing message */
4750     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4751     rstartsj = sstartsj + nsends +1;
4752     sstartsj[0] = 0;  rstartsj[0] = 0;
4753     len = 0; /* total length of j or a array to be sent */
4754     k = 0;
4755     for (i=0; i<nsends; i++){
4756       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4757       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4758       for (j=0; j<nrows; j++) {
4759         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4760         for (l=0; l<sbs; l++){
4761           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4762           rowlen[j*sbs+l] = ncols;
4763           len += ncols;
4764           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4765         }
4766         k++;
4767       }
4768       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4769       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4770     }
4771     /* recvs and sends of i-array are completed */
4772     i = nrecvs;
4773     while (i--) {
4774       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4775     }
4776     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4777 
4778     /* allocate buffers for sending j and a arrays */
4779     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4780     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4781 
4782     /* create i-array of B_oth */
4783     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4784     b_othi[0] = 0;
4785     len = 0; /* total length of j or a array to be received */
4786     k = 0;
4787     for (i=0; i<nrecvs; i++){
4788       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4789       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4790       for (j=0; j<nrows; j++) {
4791         b_othi[k+1] = b_othi[k] + rowlen[j];
4792         len += rowlen[j]; k++;
4793       }
4794       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4795     }
4796 
4797     /* allocate space for j and a arrrays of B_oth */
4798     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4799     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4800 
4801     /* j-array */
4802     /*---------*/
4803     /*  post receives of j-array */
4804     for (i=0; i<nrecvs; i++){
4805       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4806       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4807     }
4808 
4809     /* pack the outgoing message j-array */
4810     k = 0;
4811     for (i=0; i<nsends; i++){
4812       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4813       bufJ = bufj+sstartsj[i];
4814       for (j=0; j<nrows; j++) {
4815         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4816         for (ll=0; ll<sbs; ll++){
4817           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4818           for (l=0; l<ncols; l++){
4819             *bufJ++ = cols[l];
4820           }
4821           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4822         }
4823       }
4824       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4825     }
4826 
4827     /* recvs and sends of j-array are completed */
4828     i = nrecvs;
4829     while (i--) {
4830       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4831     }
4832     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4833   } else if (scall == MAT_REUSE_MATRIX){
4834     sstartsj = *startsj;
4835     rstartsj = sstartsj + nsends +1;
4836     bufa     = *bufa_ptr;
4837     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4838     b_otha   = b_oth->a;
4839   } else {
4840     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4841   }
4842 
4843   /* a-array */
4844   /*---------*/
4845   /*  post receives of a-array */
4846   for (i=0; i<nrecvs; i++){
4847     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4848     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4849   }
4850 
4851   /* pack the outgoing message a-array */
4852   k = 0;
4853   for (i=0; i<nsends; i++){
4854     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4855     bufA = bufa+sstartsj[i];
4856     for (j=0; j<nrows; j++) {
4857       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4858       for (ll=0; ll<sbs; ll++){
4859         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4860         for (l=0; l<ncols; l++){
4861           *bufA++ = vals[l];
4862         }
4863         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4864       }
4865     }
4866     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4867   }
4868   /* recvs and sends of a-array are completed */
4869   i = nrecvs;
4870   while (i--) {
4871     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4872   }
4873   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4874   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4875 
4876   if (scall == MAT_INITIAL_MATRIX){
4877     /* put together the new matrix */
4878     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4879 
4880     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4881     /* Since these are PETSc arrays, change flags to free them as necessary. */
4882     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4883     b_oth->free_a  = PETSC_TRUE;
4884     b_oth->free_ij = PETSC_TRUE;
4885     b_oth->nonew   = 0;
4886 
4887     ierr = PetscFree(bufj);CHKERRQ(ierr);
4888     if (!startsj || !bufa_ptr){
4889       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4890       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4891     } else {
4892       *startsj  = sstartsj;
4893       *bufa_ptr = bufa;
4894     }
4895   }
4896   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4897   PetscFunctionReturn(0);
4898 }
4899 
4900 #undef __FUNCT__
4901 #define __FUNCT__ "MatGetCommunicationStructs"
4902 /*@C
4903   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4904 
4905   Not Collective
4906 
4907   Input Parameters:
4908 . A - The matrix in mpiaij format
4909 
4910   Output Parameter:
4911 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4912 . colmap - A map from global column index to local index into lvec
4913 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4914 
4915   Level: developer
4916 
4917 @*/
4918 #if defined (PETSC_USE_CTABLE)
4919 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4920 #else
4921 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4922 #endif
4923 {
4924   Mat_MPIAIJ *a;
4925 
4926   PetscFunctionBegin;
4927   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4928   PetscValidPointer(lvec, 2)
4929   PetscValidPointer(colmap, 3)
4930   PetscValidPointer(multScatter, 4)
4931   a = (Mat_MPIAIJ *) A->data;
4932   if (lvec) *lvec = a->lvec;
4933   if (colmap) *colmap = a->colmap;
4934   if (multScatter) *multScatter = a->Mvctx;
4935   PetscFunctionReturn(0);
4936 }
4937 
4938 EXTERN_C_BEGIN
4939 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4940 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4941 EXTERN_C_END
4942 
4943 #undef __FUNCT__
4944 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4945 /*
4946     Computes (B'*A')' since computing B*A directly is untenable
4947 
4948                n                       p                          p
4949         (              )       (              )         (                  )
4950       m (      A       )  *  n (       B      )   =   m (         C        )
4951         (              )       (              )         (                  )
4952 
4953 */
4954 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4955 {
4956   PetscErrorCode     ierr;
4957   Mat                At,Bt,Ct;
4958 
4959   PetscFunctionBegin;
4960   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4961   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4962   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4963   ierr = MatDestroy(At);CHKERRQ(ierr);
4964   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4965   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4966   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4967   PetscFunctionReturn(0);
4968 }
4969 
4970 #undef __FUNCT__
4971 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4972 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4973 {
4974   PetscErrorCode ierr;
4975   PetscInt       m=A->rmap->n,n=B->cmap->n;
4976   Mat            Cmat;
4977 
4978   PetscFunctionBegin;
4979   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4980   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4981   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4982   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4983   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4984   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4985   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4986   *C   = Cmat;
4987   PetscFunctionReturn(0);
4988 }
4989 
4990 /* ----------------------------------------------------------------*/
4991 #undef __FUNCT__
4992 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4993 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4994 {
4995   PetscErrorCode ierr;
4996 
4997   PetscFunctionBegin;
4998   if (scall == MAT_INITIAL_MATRIX){
4999     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5000   }
5001   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5002   PetscFunctionReturn(0);
5003 }
5004 
5005 EXTERN_C_BEGIN
5006 #if defined(PETSC_HAVE_MUMPS)
5007 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5008 #endif
5009 #if defined(PETSC_HAVE_PASTIX)
5010 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5011 #endif
5012 #if defined(PETSC_HAVE_SUPERLU_DIST)
5013 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5014 #endif
5015 #if defined(PETSC_HAVE_SPOOLES)
5016 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5017 #endif
5018 EXTERN_C_END
5019 
5020 /*MC
5021    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5022 
5023    Options Database Keys:
5024 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5025 
5026   Level: beginner
5027 
5028 .seealso: MatCreateMPIAIJ()
5029 M*/
5030 
5031 EXTERN_C_BEGIN
5032 #undef __FUNCT__
5033 #define __FUNCT__ "MatCreate_MPIAIJ"
5034 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5035 {
5036   Mat_MPIAIJ     *b;
5037   PetscErrorCode ierr;
5038   PetscMPIInt    size;
5039 
5040   PetscFunctionBegin;
5041   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5042 
5043   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5044   B->data         = (void*)b;
5045   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5046   B->rmap->bs     = 1;
5047   B->assembled    = PETSC_FALSE;
5048   B->mapping      = 0;
5049 
5050   B->insertmode   = NOT_SET_VALUES;
5051   b->size         = size;
5052   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5053 
5054   /* build cache for off array entries formed */
5055   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5056   b->donotstash  = PETSC_FALSE;
5057   b->colmap      = 0;
5058   b->garray      = 0;
5059   b->roworiented = PETSC_TRUE;
5060 
5061   /* stuff used for matrix vector multiply */
5062   b->lvec      = PETSC_NULL;
5063   b->Mvctx     = PETSC_NULL;
5064 
5065   /* stuff for MatGetRow() */
5066   b->rowindices   = 0;
5067   b->rowvalues    = 0;
5068   b->getrowactive = PETSC_FALSE;
5069 
5070 #if defined(PETSC_HAVE_SPOOLES)
5071   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5072                                      "MatGetFactor_mpiaij_spooles",
5073                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5074 #endif
5075 #if defined(PETSC_HAVE_MUMPS)
5076   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5077                                      "MatGetFactor_mpiaij_mumps",
5078                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5079 #endif
5080 #if defined(PETSC_HAVE_PASTIX)
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5082 					   "MatGetFactor_mpiaij_pastix",
5083 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5084 #endif
5085 #if defined(PETSC_HAVE_SUPERLU_DIST)
5086   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5087                                      "MatGetFactor_mpiaij_superlu_dist",
5088                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5089 #endif
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5091                                      "MatStoreValues_MPIAIJ",
5092                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5094                                      "MatRetrieveValues_MPIAIJ",
5095                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5097 				     "MatGetDiagonalBlock_MPIAIJ",
5098                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5100 				     "MatIsTranspose_MPIAIJ",
5101 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5103 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5104 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5106 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5107 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5109 				     "MatDiagonalScaleLocal_MPIAIJ",
5110 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5112                                      "MatConvert_MPIAIJ_MPICSRPERM",
5113                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5115                                      "MatConvert_MPIAIJ_MPICRL",
5116                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5118                                      "MatMatMult_MPIDense_MPIAIJ",
5119                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5120   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5121                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5122                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5123   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5124                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5125                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5126   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5127   PetscFunctionReturn(0);
5128 }
5129 EXTERN_C_END
5130 
5131 #undef __FUNCT__
5132 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5133 /*@
5134      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5135          and "off-diagonal" part of the matrix in CSR format.
5136 
5137    Collective on MPI_Comm
5138 
5139    Input Parameters:
5140 +  comm - MPI communicator
5141 .  m - number of local rows (Cannot be PETSC_DECIDE)
5142 .  n - This value should be the same as the local size used in creating the
5143        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5144        calculated if N is given) For square matrices n is almost always m.
5145 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5146 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5147 .   i - row indices for "diagonal" portion of matrix
5148 .   j - column indices
5149 .   a - matrix values
5150 .   oi - row indices for "off-diagonal" portion of matrix
5151 .   oj - column indices
5152 -   oa - matrix values
5153 
5154    Output Parameter:
5155 .   mat - the matrix
5156 
5157    Level: advanced
5158 
5159    Notes:
5160        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5161 
5162        The i and j indices are 0 based
5163 
5164        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5165 
5166        This sets local rows and cannot be used to set off-processor values.
5167 
5168        You cannot later use MatSetValues() to change values in this matrix.
5169 
5170 .keywords: matrix, aij, compressed row, sparse, parallel
5171 
5172 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5173           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5174 @*/
5175 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5176 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5177 {
5178   PetscErrorCode ierr;
5179   Mat_MPIAIJ     *maij;
5180 
5181  PetscFunctionBegin;
5182   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5183   if (i[0]) {
5184     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5185   }
5186   if (oi[0]) {
5187     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5188   }
5189   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5190   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5191   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5192   maij = (Mat_MPIAIJ*) (*mat)->data;
5193   maij->donotstash     = PETSC_TRUE;
5194   (*mat)->preallocated = PETSC_TRUE;
5195 
5196   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5197   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5198   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5199   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5200 
5201   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5202   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5203 
5204   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5206   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5207   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5208 
5209   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5210   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5211   PetscFunctionReturn(0);
5212 }
5213 
5214 /*
5215     Special version for direct calls from Fortran
5216 */
5217 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5218 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5219 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5220 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5221 #endif
5222 
5223 /* Change these macros so can be used in void function */
5224 #undef CHKERRQ
5225 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5226 #undef SETERRQ2
5227 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5228 #undef SETERRQ
5229 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5230 
5231 EXTERN_C_BEGIN
5232 #undef __FUNCT__
5233 #define __FUNCT__ "matsetvaluesmpiaij_"
5234 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5235 {
5236   Mat             mat = *mmat;
5237   PetscInt        m = *mm, n = *mn;
5238   InsertMode      addv = *maddv;
5239   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5240   PetscScalar     value;
5241   PetscErrorCode  ierr;
5242 
5243   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5244   if (mat->insertmode == NOT_SET_VALUES) {
5245     mat->insertmode = addv;
5246   }
5247 #if defined(PETSC_USE_DEBUG)
5248   else if (mat->insertmode != addv) {
5249     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5250   }
5251 #endif
5252   {
5253   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5254   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5255   PetscTruth      roworiented = aij->roworiented;
5256 
5257   /* Some Variables required in the macro */
5258   Mat             A = aij->A;
5259   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5260   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5261   MatScalar       *aa = a->a;
5262   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5263   Mat             B = aij->B;
5264   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5265   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5266   MatScalar       *ba = b->a;
5267 
5268   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5269   PetscInt        nonew = a->nonew;
5270   MatScalar       *ap1,*ap2;
5271 
5272   PetscFunctionBegin;
5273   for (i=0; i<m; i++) {
5274     if (im[i] < 0) continue;
5275 #if defined(PETSC_USE_DEBUG)
5276     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5277 #endif
5278     if (im[i] >= rstart && im[i] < rend) {
5279       row      = im[i] - rstart;
5280       lastcol1 = -1;
5281       rp1      = aj + ai[row];
5282       ap1      = aa + ai[row];
5283       rmax1    = aimax[row];
5284       nrow1    = ailen[row];
5285       low1     = 0;
5286       high1    = nrow1;
5287       lastcol2 = -1;
5288       rp2      = bj + bi[row];
5289       ap2      = ba + bi[row];
5290       rmax2    = bimax[row];
5291       nrow2    = bilen[row];
5292       low2     = 0;
5293       high2    = nrow2;
5294 
5295       for (j=0; j<n; j++) {
5296         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5297         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5298         if (in[j] >= cstart && in[j] < cend){
5299           col = in[j] - cstart;
5300           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5301         } else if (in[j] < 0) continue;
5302 #if defined(PETSC_USE_DEBUG)
5303         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5304 #endif
5305         else {
5306           if (mat->was_assembled) {
5307             if (!aij->colmap) {
5308               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5309             }
5310 #if defined (PETSC_USE_CTABLE)
5311             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5312 	    col--;
5313 #else
5314             col = aij->colmap[in[j]] - 1;
5315 #endif
5316             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5317               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5318               col =  in[j];
5319               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5320               B = aij->B;
5321               b = (Mat_SeqAIJ*)B->data;
5322               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5323               rp2      = bj + bi[row];
5324               ap2      = ba + bi[row];
5325               rmax2    = bimax[row];
5326               nrow2    = bilen[row];
5327               low2     = 0;
5328               high2    = nrow2;
5329               bm       = aij->B->rmap->n;
5330               ba = b->a;
5331             }
5332           } else col = in[j];
5333           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5334         }
5335       }
5336     } else {
5337       if (!aij->donotstash) {
5338         if (roworiented) {
5339           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5340         } else {
5341           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5342         }
5343       }
5344     }
5345   }}
5346   PetscFunctionReturnVoid();
5347 }
5348 EXTERN_C_END
5349 
5350