xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision da518b7012a3f6741cd7d2f8b91a37c7afd18123)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscBool      found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   if (A->nooffproczerorows) {
606     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
607     nrecvs = nsends;
608     nmax   = N;
609   } else {
610     /* inform other processors of number of messages and max length*/
611     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
612   }
613 
614   /* post receives:   */
615   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
616   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
617   for (i=0; i<nrecvs; i++) {
618     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
619   }
620 
621   /* do sends:
622       1) starts[i] gives the starting index in svalues for stuff going to
623          the ith processor
624   */
625   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
626   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
627   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
628   starts[0] = 0;
629   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   for (i=0; i<N; i++) {
631     svalues[starts[owner[i]]++] = rows[i];
632   }
633 
634   starts[0] = 0;
635   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
636   count = 0;
637   for (i=0; i<size; i++) {
638     if (nprocs[2*i+1]) {
639       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
640     }
641   }
642   ierr = PetscFree(starts);CHKERRQ(ierr);
643 
644   base = owners[rank];
645 
646   /*  wait on receives */
647   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
648   count  = nrecvs; slen = 0;
649   while (count) {
650     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
651     /* unpack receives into our local space */
652     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
653     source[imdex]  = recv_status.MPI_SOURCE;
654     lens[imdex]    = n;
655     slen          += n;
656     count--;
657   }
658   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
659 
660   /* move the data into the send scatter */
661   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
662   count = 0;
663   for (i=0; i<nrecvs; i++) {
664     values = rvalues + i*nmax;
665     for (j=0; j<lens[i]; j++) {
666       lrows[count++] = values[j] - base;
667     }
668   }
669   ierr = PetscFree(rvalues);CHKERRQ(ierr);
670   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
671   ierr = PetscFree(owner);CHKERRQ(ierr);
672   ierr = PetscFree(nprocs);CHKERRQ(ierr);
673 
674   /* actually zap the local rows */
675   /*
676         Zero the required rows. If the "diagonal block" of the matrix
677      is square and the user wishes to set the diagonal we use separate
678      code so that MatSetValues() is not called for each diagonal allocating
679      new memory, thus calling lots of mallocs and slowing things down.
680 
681   */
682   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
683   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
684   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
685     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
686   } else if (diag != 0.0) {
687     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
688     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
689       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
691     }
692     for (i = 0; i < slen; i++) {
693       row  = lrows[i] + rstart;
694       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
695     }
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698   } else {
699     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
700   }
701   ierr = PetscFree(lrows);CHKERRQ(ierr);
702 
703   /* wait on sends */
704   if (nsends) {
705     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
706     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
707     ierr = PetscFree(send_status);CHKERRQ(ierr);
708   }
709   ierr = PetscFree(send_waits);CHKERRQ(ierr);
710   ierr = PetscFree(svalues);CHKERRQ(ierr);
711 
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatMult_MPIAIJ"
717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721   PetscInt       nt;
722 
723   PetscFunctionBegin;
724   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
725   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
726   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
728   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNCT__
734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
736 {
737   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
738   PetscErrorCode ierr;
739 
740   PetscFunctionBegin;
741   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatMultAdd_MPIAIJ"
747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
748 {
749   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
755   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
763 {
764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
765   PetscErrorCode ierr;
766   PetscBool      merged;
767 
768   PetscFunctionBegin;
769   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
770   /* do nondiagonal part */
771   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
772   if (!merged) {
773     /* send it on its way */
774     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
775     /* do local part */
776     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
777     /* receive remote parts: note this assumes the values are not actually */
778     /* added in yy until the next line, */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   } else {
781     /* do local part */
782     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
783     /* send it on its way */
784     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785     /* values actually were received in the Begin() but we need to call this nop */
786     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 EXTERN_C_BEGIN
792 #undef __FUNCT__
793 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
795 {
796   MPI_Comm       comm;
797   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
798   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
799   IS             Me,Notme;
800   PetscErrorCode ierr;
801   PetscInt       M,N,first,last,*notme,i;
802   PetscMPIInt    size;
803 
804   PetscFunctionBegin;
805 
806   /* Easy test: symmetric diagonal block */
807   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
808   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
809   if (!*f) PetscFunctionReturn(0);
810   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
812   if (size == 1) PetscFunctionReturn(0);
813 
814   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
815   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
816   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
817   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
818   for (i=0; i<first; i++) notme[i] = i;
819   for (i=last; i<M; i++) notme[i-last+first] = i;
820   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
821   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
822   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
823   Aoff = Aoffs[0];
824   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
825   Boff = Boffs[0];
826   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
828   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
829   ierr = ISDestroy(Me);CHKERRQ(ierr);
830   ierr = ISDestroy(Notme);CHKERRQ(ierr);
831 
832   PetscFunctionReturn(0);
833 }
834 EXTERN_C_END
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
839 {
840   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode ierr;
842 
843   PetscFunctionBegin;
844   /* do nondiagonal part */
845   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
846   /* send it on its way */
847   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
848   /* do local part */
849   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
850   /* receive remote parts */
851   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
852   PetscFunctionReturn(0);
853 }
854 
855 /*
856   This only works correctly for square matrices where the subblock A->A is the
857    diagonal block
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
862 {
863   PetscErrorCode ierr;
864   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
865 
866   PetscFunctionBegin;
867   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
868   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
900   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_CLASSID;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscBool         isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscBool  inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscBool  isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscBool  flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscBool      iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscBool      hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SPD:
1449     A->spd_set                         = PETSC_TRUE;
1450     A->spd                             = flg;
1451     if (flg) {
1452       A->symmetric                     = PETSC_TRUE;
1453       A->structurally_symmetric        = PETSC_TRUE;
1454       A->symmetric_set                 = PETSC_TRUE;
1455       A->structurally_symmetric_set    = PETSC_TRUE;
1456     }
1457     break;
1458   case MAT_SYMMETRIC:
1459     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1460     break;
1461   case MAT_STRUCTURALLY_SYMMETRIC:
1462     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1463     break;
1464   case MAT_HERMITIAN:
1465     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1466     break;
1467   case MAT_SYMMETRY_ETERNAL:
1468     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1469     break;
1470   default:
1471     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatGetRow_MPIAIJ"
1478 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1482   PetscErrorCode ierr;
1483   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1484   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1485   PetscInt       *cmap,*idx_p;
1486 
1487   PetscFunctionBegin;
1488   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1489   mat->getrowactive = PETSC_TRUE;
1490 
1491   if (!mat->rowvalues && (idx || v)) {
1492     /*
1493         allocate enough space to hold information from the longest row.
1494     */
1495     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1496     PetscInt   max = 1,tmp;
1497     for (i=0; i<matin->rmap->n; i++) {
1498       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1499       if (max < tmp) { max = tmp; }
1500     }
1501     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1502   }
1503 
1504   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1505   lrow = row - rstart;
1506 
1507   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1508   if (!v)   {pvA = 0; pvB = 0;}
1509   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1510   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1511   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1512   nztot = nzA + nzB;
1513 
1514   cmap  = mat->garray;
1515   if (v  || idx) {
1516     if (nztot) {
1517       /* Sort by increasing column numbers, assuming A and B already sorted */
1518       PetscInt imark = -1;
1519       if (v) {
1520         *v = v_p = mat->rowvalues;
1521         for (i=0; i<nzB; i++) {
1522           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1523           else break;
1524         }
1525         imark = i;
1526         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1527         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1528       }
1529       if (idx) {
1530         *idx = idx_p = mat->rowindices;
1531         if (imark > -1) {
1532           for (i=0; i<imark; i++) {
1533             idx_p[i] = cmap[cworkB[i]];
1534           }
1535         } else {
1536           for (i=0; i<nzB; i++) {
1537             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1538             else break;
1539           }
1540           imark = i;
1541         }
1542         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1543         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1544       }
1545     } else {
1546       if (idx) *idx = 0;
1547       if (v)   *v   = 0;
1548     }
1549   }
1550   *nz = nztot;
1551   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1552   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1558 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1559 {
1560   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1561 
1562   PetscFunctionBegin;
1563   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1564   aij->getrowactive = PETSC_FALSE;
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatNorm_MPIAIJ"
1570 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1571 {
1572   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1573   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1574   PetscErrorCode ierr;
1575   PetscInt       i,j,cstart = mat->cmap->rstart;
1576   PetscReal      sum = 0.0;
1577   MatScalar      *v;
1578 
1579   PetscFunctionBegin;
1580   if (aij->size == 1) {
1581     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1582   } else {
1583     if (type == NORM_FROBENIUS) {
1584       v = amat->a;
1585       for (i=0; i<amat->nz; i++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1588 #else
1589         sum += (*v)*(*v); v++;
1590 #endif
1591       }
1592       v = bmat->a;
1593       for (i=0; i<bmat->nz; i++) {
1594 #if defined(PETSC_USE_COMPLEX)
1595         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1596 #else
1597         sum += (*v)*(*v); v++;
1598 #endif
1599       }
1600       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1601       *norm = sqrt(*norm);
1602     } else if (type == NORM_1) { /* max column norm */
1603       PetscReal *tmp,*tmp2;
1604       PetscInt  *jj,*garray = aij->garray;
1605       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1606       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1607       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1608       *norm = 0.0;
1609       v = amat->a; jj = amat->j;
1610       for (j=0; j<amat->nz; j++) {
1611         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1612       }
1613       v = bmat->a; jj = bmat->j;
1614       for (j=0; j<bmat->nz; j++) {
1615         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1616       }
1617       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1618       for (j=0; j<mat->cmap->N; j++) {
1619         if (tmp2[j] > *norm) *norm = tmp2[j];
1620       }
1621       ierr = PetscFree(tmp);CHKERRQ(ierr);
1622       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1623     } else if (type == NORM_INFINITY) { /* max row norm */
1624       PetscReal ntemp = 0.0;
1625       for (j=0; j<aij->A->rmap->n; j++) {
1626         v = amat->a + amat->i[j];
1627         sum = 0.0;
1628         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1629           sum += PetscAbsScalar(*v); v++;
1630         }
1631         v = bmat->a + bmat->i[j];
1632         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1633           sum += PetscAbsScalar(*v); v++;
1634         }
1635         if (sum > ntemp) ntemp = sum;
1636       }
1637       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1638     } else {
1639       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1640     }
1641   }
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatTranspose_MPIAIJ"
1647 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1648 {
1649   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1650   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1651   PetscErrorCode ierr;
1652   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1653   PetscInt       cstart=A->cmap->rstart,ncol;
1654   Mat            B;
1655   MatScalar      *array;
1656 
1657   PetscFunctionBegin;
1658   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1659 
1660   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1661   ai = Aloc->i; aj = Aloc->j;
1662   bi = Bloc->i; bj = Bloc->j;
1663   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1664     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1665     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1666     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1667     for (i=0; i<ai[ma]; i++){
1668       d_nnz[aj[i]] ++;
1669       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1670     }
1671 
1672     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1673     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1674     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1675     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1676     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1677   } else {
1678     B = *matout;
1679   }
1680 
1681   /* copy over the A part */
1682   array = Aloc->a;
1683   row = A->rmap->rstart;
1684   for (i=0; i<ma; i++) {
1685     ncol = ai[i+1]-ai[i];
1686     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1687     row++; array += ncol; aj += ncol;
1688   }
1689   aj = Aloc->j;
1690   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1691 
1692   /* copy over the B part */
1693   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1694   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1695   array = Bloc->a;
1696   row = A->rmap->rstart;
1697   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1698   cols_tmp = cols;
1699   for (i=0; i<mb; i++) {
1700     ncol = bi[i+1]-bi[i];
1701     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1702     row++; array += ncol; cols_tmp += ncol;
1703   }
1704   ierr = PetscFree(cols);CHKERRQ(ierr);
1705 
1706   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1707   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1708   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1709     *matout = B;
1710   } else {
1711     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1712   }
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 #undef __FUNCT__
1717 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1718 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1719 {
1720   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1721   Mat            a = aij->A,b = aij->B;
1722   PetscErrorCode ierr;
1723   PetscInt       s1,s2,s3;
1724 
1725   PetscFunctionBegin;
1726   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1727   if (rr) {
1728     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1729     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1730     /* Overlap communication with computation. */
1731     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732   }
1733   if (ll) {
1734     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1735     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1736     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1737   }
1738   /* scale  the diagonal block */
1739   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1740 
1741   if (rr) {
1742     /* Do a scatter end and then right scale the off-diagonal block */
1743     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1744     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1745   }
1746 
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1752 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1753 {
1754   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1755   PetscErrorCode ierr;
1756 
1757   PetscFunctionBegin;
1758   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1759   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1760   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1761   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1766 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1767 {
1768   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatEqual_MPIAIJ"
1778 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1779 {
1780   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1781   Mat            a,b,c,d;
1782   PetscBool      flg;
1783   PetscErrorCode ierr;
1784 
1785   PetscFunctionBegin;
1786   a = matA->A; b = matA->B;
1787   c = matB->A; d = matB->B;
1788 
1789   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1790   if (flg) {
1791     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1792   }
1793   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatCopy_MPIAIJ"
1799 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1800 {
1801   PetscErrorCode ierr;
1802   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1803   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1804 
1805   PetscFunctionBegin;
1806   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1807   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1808     /* because of the column compression in the off-processor part of the matrix a->B,
1809        the number of columns in a->B and b->B may be different, hence we cannot call
1810        the MatCopy() directly on the two parts. If need be, we can provide a more
1811        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1812        then copying the submatrices */
1813     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1814   } else {
1815     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1816     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1817   }
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1823 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1824 {
1825   PetscErrorCode ierr;
1826 
1827   PetscFunctionBegin;
1828   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatAXPY_MPIAIJ"
1834 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1835 {
1836   PetscErrorCode ierr;
1837   PetscInt       i;
1838   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1839   PetscBLASInt   bnz,one=1;
1840   Mat_SeqAIJ     *x,*y;
1841 
1842   PetscFunctionBegin;
1843   if (str == SAME_NONZERO_PATTERN) {
1844     PetscScalar alpha = a;
1845     x = (Mat_SeqAIJ *)xx->A->data;
1846     y = (Mat_SeqAIJ *)yy->A->data;
1847     bnz = PetscBLASIntCast(x->nz);
1848     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     bnz = PetscBLASIntCast(x->nz);
1852     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1853   } else if (str == SUBSET_NONZERO_PATTERN) {
1854     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1855 
1856     x = (Mat_SeqAIJ *)xx->B->data;
1857     y = (Mat_SeqAIJ *)yy->B->data;
1858     if (y->xtoy && y->XtoY != xx->B) {
1859       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1860       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1861     }
1862     if (!y->xtoy) { /* get xtoy */
1863       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1864       y->XtoY = xx->B;
1865       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1866     }
1867     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1868   } else {
1869     Mat B;
1870     PetscInt *nnz_d,*nnz_o;
1871     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
1872     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
1873     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
1874     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1875     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1876     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
1877     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1878     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
1879     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
1880     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1881     ierr = MatHeaderReplace(Y,B);
1882     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1883     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1884   }
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatConjugate_MPIAIJ"
1892 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1893 {
1894 #if defined(PETSC_USE_COMPLEX)
1895   PetscErrorCode ierr;
1896   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1897 
1898   PetscFunctionBegin;
1899   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1900   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1901 #else
1902   PetscFunctionBegin;
1903 #endif
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatRealPart_MPIAIJ"
1909 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1910 {
1911   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1912   PetscErrorCode ierr;
1913 
1914   PetscFunctionBegin;
1915   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1916   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1922 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1923 {
1924   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1929   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #ifdef PETSC_HAVE_PBGL
1934 
1935 #include <boost/parallel/mpi/bsp_process_group.hpp>
1936 #include <boost/graph/distributed/ilu_default_graph.hpp>
1937 #include <boost/graph/distributed/ilu_0_block.hpp>
1938 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1939 #include <boost/graph/distributed/petsc/interface.hpp>
1940 #include <boost/multi_array.hpp>
1941 #include <boost/parallel/distributed_property_map->hpp>
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1945 /*
1946   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1947 */
1948 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1949 {
1950   namespace petsc = boost::distributed::petsc;
1951 
1952   namespace graph_dist = boost::graph::distributed;
1953   using boost::graph::distributed::ilu_default::process_group_type;
1954   using boost::graph::ilu_permuted;
1955 
1956   PetscBool       row_identity, col_identity;
1957   PetscContainer  c;
1958   PetscInt        m, n, M, N;
1959   PetscErrorCode  ierr;
1960 
1961   PetscFunctionBegin;
1962   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1963   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1964   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1965   if (!row_identity || !col_identity) {
1966     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1967   }
1968 
1969   process_group_type pg;
1970   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1971   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1972   lgraph_type&   level_graph = *lgraph_p;
1973   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1974 
1975   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1976   ilu_permuted(level_graph);
1977 
1978   /* put together the new matrix */
1979   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1980   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1981   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1982   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1983   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1984   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1985   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1986 
1987   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1988   ierr = PetscContainerSetPointer(c, lgraph_p);
1989   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1995 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1996 {
1997   PetscFunctionBegin;
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "MatSolve_MPIAIJ"
2003 /*
2004   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2005 */
2006 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2007 {
2008   namespace graph_dist = boost::graph::distributed;
2009 
2010   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2011   lgraph_type*   lgraph_p;
2012   PetscContainer c;
2013   PetscErrorCode ierr;
2014 
2015   PetscFunctionBegin;
2016   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2017   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2018   ierr = VecCopy(b, x);CHKERRQ(ierr);
2019 
2020   PetscScalar* array_x;
2021   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2022   PetscInt sx;
2023   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2024 
2025   PetscScalar* array_b;
2026   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2027   PetscInt sb;
2028   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2029 
2030   lgraph_type&   level_graph = *lgraph_p;
2031   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2032 
2033   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2034   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2035                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2036 
2037   typedef boost::iterator_property_map<array_ref_type::iterator,
2038                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2039   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2040                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2041 
2042   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2043 
2044   PetscFunctionReturn(0);
2045 }
2046 #endif
2047 
2048 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2049   PetscInt       nzlocal,nsends,nrecvs;
2050   PetscMPIInt    *send_rank,*recv_rank;
2051   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2052   PetscScalar    *sbuf_a,**rbuf_a;
2053   PetscErrorCode (*MatDestroy)(Mat);
2054 } Mat_Redundant;
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2058 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2059 {
2060   PetscErrorCode       ierr;
2061   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2062   PetscInt             i;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2066   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2067   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2068   for (i=0; i<redund->nrecvs; i++){
2069     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2070     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2071   }
2072   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2073   ierr = PetscFree(redund);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatDestroy_MatRedundant"
2079 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2080 {
2081   PetscErrorCode  ierr;
2082   PetscContainer  container;
2083   Mat_Redundant   *redund=PETSC_NULL;
2084 
2085   PetscFunctionBegin;
2086   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2087   if (container) {
2088     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2089   } else {
2090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2091   }
2092   A->ops->destroy = redund->MatDestroy;
2093   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2094   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2095   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2101 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2102 {
2103   PetscMPIInt    rank,size;
2104   MPI_Comm       comm=((PetscObject)mat)->comm;
2105   PetscErrorCode ierr;
2106   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2107   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2108   PetscInt       *rowrange=mat->rmap->range;
2109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2110   Mat            A=aij->A,B=aij->B,C=*matredundant;
2111   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2112   PetscScalar    *sbuf_a;
2113   PetscInt       nzlocal=a->nz+b->nz;
2114   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2115   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2116   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2117   MatScalar      *aworkA,*aworkB;
2118   PetscScalar    *vals;
2119   PetscMPIInt    tag1,tag2,tag3,imdex;
2120   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2121                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2122   MPI_Status     recv_status,*send_status;
2123   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2124   PetscInt       **rbuf_j=PETSC_NULL;
2125   PetscScalar    **rbuf_a=PETSC_NULL;
2126   Mat_Redundant  *redund=PETSC_NULL;
2127   PetscContainer container;
2128 
2129   PetscFunctionBegin;
2130   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2131   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2132 
2133   if (reuse == MAT_REUSE_MATRIX) {
2134     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2135     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2136     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2137     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2138     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2139     if (container) {
2140       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2141     } else {
2142       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2143     }
2144     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2145 
2146     nsends    = redund->nsends;
2147     nrecvs    = redund->nrecvs;
2148     send_rank = redund->send_rank;
2149     recv_rank = redund->recv_rank;
2150     sbuf_nz   = redund->sbuf_nz;
2151     rbuf_nz   = redund->rbuf_nz;
2152     sbuf_j    = redund->sbuf_j;
2153     sbuf_a    = redund->sbuf_a;
2154     rbuf_j    = redund->rbuf_j;
2155     rbuf_a    = redund->rbuf_a;
2156   }
2157 
2158   if (reuse == MAT_INITIAL_MATRIX){
2159     PetscMPIInt  subrank,subsize;
2160     PetscInt     nleftover,np_subcomm;
2161     /* get the destination processors' id send_rank, nsends and nrecvs */
2162     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2163     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2164     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2165     np_subcomm = size/nsubcomm;
2166     nleftover  = size - nsubcomm*np_subcomm;
2167     nsends = 0; nrecvs = 0;
2168     for (i=0; i<size; i++){ /* i=rank*/
2169       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2170         send_rank[nsends] = i; nsends++;
2171         recv_rank[nrecvs++] = i;
2172       }
2173     }
2174     if (rank >= size - nleftover){/* this proc is a leftover processor */
2175       i = size-nleftover-1;
2176       j = 0;
2177       while (j < nsubcomm - nleftover){
2178         send_rank[nsends++] = i;
2179         i--; j++;
2180       }
2181     }
2182 
2183     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2184       for (i=0; i<nleftover; i++){
2185         recv_rank[nrecvs++] = size-nleftover+i;
2186       }
2187     }
2188 
2189     /* allocate sbuf_j, sbuf_a */
2190     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2191     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2192     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2193   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2194 
2195   /* copy mat's local entries into the buffers */
2196   if (reuse == MAT_INITIAL_MATRIX){
2197     rownz_max = 0;
2198     rptr = sbuf_j;
2199     cols = sbuf_j + rend-rstart + 1;
2200     vals = sbuf_a;
2201     rptr[0] = 0;
2202     for (i=0; i<rend-rstart; i++){
2203       row = i + rstart;
2204       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2205       ncols  = nzA + nzB;
2206       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2207       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2208       /* load the column indices for this row into cols */
2209       lwrite = 0;
2210       for (l=0; l<nzB; l++) {
2211         if ((ctmp = bmap[cworkB[l]]) < cstart){
2212           vals[lwrite]   = aworkB[l];
2213           cols[lwrite++] = ctmp;
2214         }
2215       }
2216       for (l=0; l<nzA; l++){
2217         vals[lwrite]   = aworkA[l];
2218         cols[lwrite++] = cstart + cworkA[l];
2219       }
2220       for (l=0; l<nzB; l++) {
2221         if ((ctmp = bmap[cworkB[l]]) >= cend){
2222           vals[lwrite]   = aworkB[l];
2223           cols[lwrite++] = ctmp;
2224         }
2225       }
2226       vals += ncols;
2227       cols += ncols;
2228       rptr[i+1] = rptr[i] + ncols;
2229       if (rownz_max < ncols) rownz_max = ncols;
2230     }
2231     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2232   } else { /* only copy matrix values into sbuf_a */
2233     rptr = sbuf_j;
2234     vals = sbuf_a;
2235     rptr[0] = 0;
2236     for (i=0; i<rend-rstart; i++){
2237       row = i + rstart;
2238       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2239       ncols  = nzA + nzB;
2240       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2241       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2242       lwrite = 0;
2243       for (l=0; l<nzB; l++) {
2244         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2245       }
2246       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2247       for (l=0; l<nzB; l++) {
2248         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2249       }
2250       vals += ncols;
2251       rptr[i+1] = rptr[i] + ncols;
2252     }
2253   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2254 
2255   /* send nzlocal to others, and recv other's nzlocal */
2256   /*--------------------------------------------------*/
2257   if (reuse == MAT_INITIAL_MATRIX){
2258     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2259     s_waits2 = s_waits3 + nsends;
2260     s_waits1 = s_waits2 + nsends;
2261     r_waits1 = s_waits1 + nsends;
2262     r_waits2 = r_waits1 + nrecvs;
2263     r_waits3 = r_waits2 + nrecvs;
2264   } else {
2265     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2266     r_waits3 = s_waits3 + nsends;
2267   }
2268 
2269   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2270   if (reuse == MAT_INITIAL_MATRIX){
2271     /* get new tags to keep the communication clean */
2272     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2273     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2274     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2275 
2276     /* post receives of other's nzlocal */
2277     for (i=0; i<nrecvs; i++){
2278       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2279     }
2280     /* send nzlocal to others */
2281     for (i=0; i<nsends; i++){
2282       sbuf_nz[i] = nzlocal;
2283       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2284     }
2285     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2286     count = nrecvs;
2287     while (count) {
2288       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2289       recv_rank[imdex] = recv_status.MPI_SOURCE;
2290       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2291       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2292 
2293       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2294       rbuf_nz[imdex] += i + 2;
2295       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2296       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2297       count--;
2298     }
2299     /* wait on sends of nzlocal */
2300     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2301     /* send mat->i,j to others, and recv from other's */
2302     /*------------------------------------------------*/
2303     for (i=0; i<nsends; i++){
2304       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2305       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2306     }
2307     /* wait on receives of mat->i,j */
2308     /*------------------------------*/
2309     count = nrecvs;
2310     while (count) {
2311       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2312       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313       count--;
2314     }
2315     /* wait on sends of mat->i,j */
2316     /*---------------------------*/
2317     if (nsends) {
2318       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2319     }
2320   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2321 
2322   /* post receives, send and receive mat->a */
2323   /*----------------------------------------*/
2324   for (imdex=0; imdex<nrecvs; imdex++) {
2325     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2326   }
2327   for (i=0; i<nsends; i++){
2328     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2329   }
2330   count = nrecvs;
2331   while (count) {
2332     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2333     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2334     count--;
2335   }
2336   if (nsends) {
2337     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2338   }
2339 
2340   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2341 
2342   /* create redundant matrix */
2343   /*-------------------------*/
2344   if (reuse == MAT_INITIAL_MATRIX){
2345     /* compute rownz_max for preallocation */
2346     for (imdex=0; imdex<nrecvs; imdex++){
2347       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2348       rptr = rbuf_j[imdex];
2349       for (i=0; i<j; i++){
2350         ncols = rptr[i+1] - rptr[i];
2351         if (rownz_max < ncols) rownz_max = ncols;
2352       }
2353     }
2354 
2355     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2356     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2357     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2358     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2359     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2360   } else {
2361     C = *matredundant;
2362   }
2363 
2364   /* insert local matrix entries */
2365   rptr = sbuf_j;
2366   cols = sbuf_j + rend-rstart + 1;
2367   vals = sbuf_a;
2368   for (i=0; i<rend-rstart; i++){
2369     row   = i + rstart;
2370     ncols = rptr[i+1] - rptr[i];
2371     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2372     vals += ncols;
2373     cols += ncols;
2374   }
2375   /* insert received matrix entries */
2376   for (imdex=0; imdex<nrecvs; imdex++){
2377     rstart = rowrange[recv_rank[imdex]];
2378     rend   = rowrange[recv_rank[imdex]+1];
2379     rptr = rbuf_j[imdex];
2380     cols = rbuf_j[imdex] + rend-rstart + 1;
2381     vals = rbuf_a[imdex];
2382     for (i=0; i<rend-rstart; i++){
2383       row   = i + rstart;
2384       ncols = rptr[i+1] - rptr[i];
2385       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2386       vals += ncols;
2387       cols += ncols;
2388     }
2389   }
2390   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2391   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2393   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2394   if (reuse == MAT_INITIAL_MATRIX){
2395     PetscContainer container;
2396     *matredundant = C;
2397     /* create a supporting struct and attach it to C for reuse */
2398     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2399     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2400     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2401     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2402     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2403 
2404     redund->nzlocal = nzlocal;
2405     redund->nsends  = nsends;
2406     redund->nrecvs  = nrecvs;
2407     redund->send_rank = send_rank;
2408     redund->recv_rank = recv_rank;
2409     redund->sbuf_nz = sbuf_nz;
2410     redund->rbuf_nz = rbuf_nz;
2411     redund->sbuf_j  = sbuf_j;
2412     redund->sbuf_a  = sbuf_a;
2413     redund->rbuf_j  = rbuf_j;
2414     redund->rbuf_a  = rbuf_a;
2415 
2416     redund->MatDestroy = C->ops->destroy;
2417     C->ops->destroy    = MatDestroy_MatRedundant;
2418   }
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 #undef __FUNCT__
2423 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2424 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2425 {
2426   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2427   PetscErrorCode ierr;
2428   PetscInt       i,*idxb = 0;
2429   PetscScalar    *va,*vb;
2430   Vec            vtmp;
2431 
2432   PetscFunctionBegin;
2433   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2434   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2435   if (idx) {
2436     for (i=0; i<A->rmap->n; i++) {
2437       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2438     }
2439   }
2440 
2441   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2442   if (idx) {
2443     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2444   }
2445   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2446   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2447 
2448   for (i=0; i<A->rmap->n; i++){
2449     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2450       va[i] = vb[i];
2451       if (idx) idx[i] = a->garray[idxb[i]];
2452     }
2453   }
2454 
2455   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2456   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2457   if (idxb) {
2458     ierr = PetscFree(idxb);CHKERRQ(ierr);
2459   }
2460   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2466 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2467 {
2468   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2469   PetscErrorCode ierr;
2470   PetscInt       i,*idxb = 0;
2471   PetscScalar    *va,*vb;
2472   Vec            vtmp;
2473 
2474   PetscFunctionBegin;
2475   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2476   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2477   if (idx) {
2478     for (i=0; i<A->cmap->n; i++) {
2479       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2480     }
2481   }
2482 
2483   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2484   if (idx) {
2485     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2486   }
2487   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2488   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2489 
2490   for (i=0; i<A->rmap->n; i++){
2491     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2492       va[i] = vb[i];
2493       if (idx) idx[i] = a->garray[idxb[i]];
2494     }
2495   }
2496 
2497   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2498   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2499   if (idxb) {
2500     ierr = PetscFree(idxb);CHKERRQ(ierr);
2501   }
2502   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2508 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2509 {
2510   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2511   PetscInt       n      = A->rmap->n;
2512   PetscInt       cstart = A->cmap->rstart;
2513   PetscInt      *cmap   = mat->garray;
2514   PetscInt      *diagIdx, *offdiagIdx;
2515   Vec            diagV, offdiagV;
2516   PetscScalar   *a, *diagA, *offdiagA;
2517   PetscInt       r;
2518   PetscErrorCode ierr;
2519 
2520   PetscFunctionBegin;
2521   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2522   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2523   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2524   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2525   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2526   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2527   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2528   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2529   for(r = 0; r < n; ++r) {
2530     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2531       a[r]   = diagA[r];
2532       idx[r] = cstart + diagIdx[r];
2533     } else {
2534       a[r]   = offdiagA[r];
2535       idx[r] = cmap[offdiagIdx[r]];
2536     }
2537   }
2538   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2539   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2540   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2541   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2542   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2543   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2549 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2550 {
2551   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2552   PetscInt       n      = A->rmap->n;
2553   PetscInt       cstart = A->cmap->rstart;
2554   PetscInt      *cmap   = mat->garray;
2555   PetscInt      *diagIdx, *offdiagIdx;
2556   Vec            diagV, offdiagV;
2557   PetscScalar   *a, *diagA, *offdiagA;
2558   PetscInt       r;
2559   PetscErrorCode ierr;
2560 
2561   PetscFunctionBegin;
2562   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2563   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2564   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2565   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2566   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2567   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2568   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2569   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2570   for(r = 0; r < n; ++r) {
2571     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2572       a[r]   = diagA[r];
2573       idx[r] = cstart + diagIdx[r];
2574     } else {
2575       a[r]   = offdiagA[r];
2576       idx[r] = cmap[offdiagIdx[r]];
2577     }
2578   }
2579   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2580   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2581   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2582   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2583   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2584   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 #undef __FUNCT__
2589 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2590 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2591 {
2592   PetscErrorCode ierr;
2593   Mat            *dummy;
2594 
2595   PetscFunctionBegin;
2596   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2597   *newmat = *dummy;
2598   ierr = PetscFree(dummy);CHKERRQ(ierr);
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2603 /* -------------------------------------------------------------------*/
2604 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2605        MatGetRow_MPIAIJ,
2606        MatRestoreRow_MPIAIJ,
2607        MatMult_MPIAIJ,
2608 /* 4*/ MatMultAdd_MPIAIJ,
2609        MatMultTranspose_MPIAIJ,
2610        MatMultTransposeAdd_MPIAIJ,
2611 #ifdef PETSC_HAVE_PBGL
2612        MatSolve_MPIAIJ,
2613 #else
2614        0,
2615 #endif
2616        0,
2617        0,
2618 /*10*/ 0,
2619        0,
2620        0,
2621        MatSOR_MPIAIJ,
2622        MatTranspose_MPIAIJ,
2623 /*15*/ MatGetInfo_MPIAIJ,
2624        MatEqual_MPIAIJ,
2625        MatGetDiagonal_MPIAIJ,
2626        MatDiagonalScale_MPIAIJ,
2627        MatNorm_MPIAIJ,
2628 /*20*/ MatAssemblyBegin_MPIAIJ,
2629        MatAssemblyEnd_MPIAIJ,
2630        MatSetOption_MPIAIJ,
2631        MatZeroEntries_MPIAIJ,
2632 /*24*/ MatZeroRows_MPIAIJ,
2633        0,
2634 #ifdef PETSC_HAVE_PBGL
2635        0,
2636 #else
2637        0,
2638 #endif
2639        0,
2640        0,
2641 /*29*/ MatSetUpPreallocation_MPIAIJ,
2642 #ifdef PETSC_HAVE_PBGL
2643        0,
2644 #else
2645        0,
2646 #endif
2647        0,
2648        0,
2649        0,
2650 /*34*/ MatDuplicate_MPIAIJ,
2651        0,
2652        0,
2653        0,
2654        0,
2655 /*39*/ MatAXPY_MPIAIJ,
2656        MatGetSubMatrices_MPIAIJ,
2657        MatIncreaseOverlap_MPIAIJ,
2658        MatGetValues_MPIAIJ,
2659        MatCopy_MPIAIJ,
2660 /*44*/ MatGetRowMax_MPIAIJ,
2661        MatScale_MPIAIJ,
2662        0,
2663        0,
2664        0,
2665 /*49*/ MatSetBlockSize_MPIAIJ,
2666        0,
2667        0,
2668        0,
2669        0,
2670 /*54*/ MatFDColoringCreate_MPIAIJ,
2671        0,
2672        MatSetUnfactored_MPIAIJ,
2673        MatPermute_MPIAIJ,
2674        0,
2675 /*59*/ MatGetSubMatrix_MPIAIJ,
2676        MatDestroy_MPIAIJ,
2677        MatView_MPIAIJ,
2678        0,
2679        0,
2680 /*64*/ 0,
2681        0,
2682        0,
2683        0,
2684        0,
2685 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2686        MatGetRowMinAbs_MPIAIJ,
2687        0,
2688        MatSetColoring_MPIAIJ,
2689 #if defined(PETSC_HAVE_ADIC)
2690        MatSetValuesAdic_MPIAIJ,
2691 #else
2692        0,
2693 #endif
2694        MatSetValuesAdifor_MPIAIJ,
2695 /*75*/ MatFDColoringApply_AIJ,
2696        0,
2697        0,
2698        0,
2699        0,
2700 /*80*/ 0,
2701        0,
2702        0,
2703 /*83*/ MatLoad_MPIAIJ,
2704        0,
2705        0,
2706        0,
2707        0,
2708        0,
2709 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2710        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2711        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2712        MatPtAP_Basic,
2713        MatPtAPSymbolic_MPIAIJ,
2714 /*94*/ MatPtAPNumeric_MPIAIJ,
2715        0,
2716        0,
2717        0,
2718        0,
2719 /*99*/ 0,
2720        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2721        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2722        MatConjugate_MPIAIJ,
2723        0,
2724 /*104*/MatSetValuesRow_MPIAIJ,
2725        MatRealPart_MPIAIJ,
2726        MatImaginaryPart_MPIAIJ,
2727        0,
2728        0,
2729 /*109*/0,
2730        MatGetRedundantMatrix_MPIAIJ,
2731        MatGetRowMin_MPIAIJ,
2732        0,
2733        0,
2734 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2735        0,
2736        0,
2737        0,
2738        0,
2739 /*119*/0,
2740        0,
2741        0,
2742        0,
2743        MatGetMultiProcBlock_MPIAIJ
2744 };
2745 
2746 /* ----------------------------------------------------------------------------------------*/
2747 
2748 EXTERN_C_BEGIN
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2751 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2752 {
2753   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2754   PetscErrorCode ierr;
2755 
2756   PetscFunctionBegin;
2757   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2758   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2759   PetscFunctionReturn(0);
2760 }
2761 EXTERN_C_END
2762 
2763 EXTERN_C_BEGIN
2764 #undef __FUNCT__
2765 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2766 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2767 {
2768   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2769   PetscErrorCode ierr;
2770 
2771   PetscFunctionBegin;
2772   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2773   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2774   PetscFunctionReturn(0);
2775 }
2776 EXTERN_C_END
2777 
2778 EXTERN_C_BEGIN
2779 #undef __FUNCT__
2780 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2781 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2782 {
2783   Mat_MPIAIJ     *b;
2784   PetscErrorCode ierr;
2785   PetscInt       i;
2786 
2787   PetscFunctionBegin;
2788   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2789   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2790   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2791   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2792 
2793   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2794   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2795   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2796   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2797   if (d_nnz) {
2798     for (i=0; i<B->rmap->n; i++) {
2799       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2800     }
2801   }
2802   if (o_nnz) {
2803     for (i=0; i<B->rmap->n; i++) {
2804       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2805     }
2806   }
2807   b = (Mat_MPIAIJ*)B->data;
2808 
2809   if (!B->preallocated) {
2810     /* Explicitly create 2 MATSEQAIJ matrices. */
2811     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2812     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2813     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2814     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2815     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2816     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2817     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2818     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2819   }
2820 
2821   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2822   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2823   B->preallocated = PETSC_TRUE;
2824   PetscFunctionReturn(0);
2825 }
2826 EXTERN_C_END
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2830 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2831 {
2832   Mat            mat;
2833   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2834   PetscErrorCode ierr;
2835 
2836   PetscFunctionBegin;
2837   *newmat       = 0;
2838   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2839   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2840   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2841   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2842   a    = (Mat_MPIAIJ*)mat->data;
2843 
2844   mat->factortype    = matin->factortype;
2845   mat->rmap->bs      = matin->rmap->bs;
2846   mat->assembled    = PETSC_TRUE;
2847   mat->insertmode   = NOT_SET_VALUES;
2848   mat->preallocated = PETSC_TRUE;
2849 
2850   a->size           = oldmat->size;
2851   a->rank           = oldmat->rank;
2852   a->donotstash     = oldmat->donotstash;
2853   a->roworiented    = oldmat->roworiented;
2854   a->rowindices     = 0;
2855   a->rowvalues      = 0;
2856   a->getrowactive   = PETSC_FALSE;
2857 
2858   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2859   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2860 
2861   if (oldmat->colmap) {
2862 #if defined (PETSC_USE_CTABLE)
2863     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2864 #else
2865     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2866     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2867     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2868 #endif
2869   } else a->colmap = 0;
2870   if (oldmat->garray) {
2871     PetscInt len;
2872     len  = oldmat->B->cmap->n;
2873     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2874     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2875     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2876   } else a->garray = 0;
2877 
2878   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2879   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2880   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2881   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2882   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2883   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2884   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2885   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2886   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2887   *newmat = mat;
2888   PetscFunctionReturn(0);
2889 }
2890 
2891 /*
2892     Allows sending/receiving larger messages then 2 gigabytes in a single call
2893 */
2894 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2895 {
2896   int             ierr;
2897   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2898   PetscInt        i,numchunks;
2899   PetscMPIInt     icnt;
2900 
2901   numchunks = cnt/CHUNKSIZE + 1;
2902   for (i=0; i<numchunks; i++) {
2903     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2904     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2905     if (type == MPIU_INT) {
2906       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2907     } else if (type == MPIU_SCALAR) {
2908       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2909     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2910   }
2911   return 0;
2912 }
2913 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2914 {
2915   int             ierr;
2916   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2917   MPI_Status      status;
2918   PetscInt        i,numchunks;
2919   PetscMPIInt     icnt;
2920 
2921   numchunks = cnt/CHUNKSIZE + 1;
2922   for (i=0; i<numchunks; i++) {
2923     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2924     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2925     if (type == MPIU_INT) {
2926       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2927     } else if (type == MPIU_SCALAR) {
2928       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2929     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2930   }
2931   return 0;
2932 }
2933 
2934 #undef __FUNCT__
2935 #define __FUNCT__ "MatLoad_MPIAIJ"
2936 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2937 {
2938   PetscScalar    *vals,*svals;
2939   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2940   PetscErrorCode ierr;
2941   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2942   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2943   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2944   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2945   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2946   int            fd;
2947 
2948   PetscFunctionBegin;
2949   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2950   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2951   if (!rank) {
2952     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2953     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2954     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2955   }
2956 
2957   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2958 
2959   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2960   M = header[1]; N = header[2];
2961   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2962   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2963   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2964 
2965   /* If global sizes are set, check if they are consistent with that given in the file */
2966   if (sizesset) {
2967     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2968   }
2969   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2970   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2971 
2972   /* determine ownership of all rows */
2973   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2974   else m = newMat->rmap->n; /* Set by user */
2975 
2976   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2977   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2978 
2979   /* First process needs enough room for process with most rows */
2980   if (!rank) {
2981     mmax       = rowners[1];
2982     for (i=2; i<size; i++) {
2983       mmax = PetscMax(mmax,rowners[i]);
2984     }
2985   } else mmax = m;
2986 
2987   rowners[0] = 0;
2988   for (i=2; i<=size; i++) {
2989     rowners[i] += rowners[i-1];
2990   }
2991   rstart = rowners[rank];
2992   rend   = rowners[rank+1];
2993 
2994   /* distribute row lengths to all processors */
2995   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2996   if (!rank) {
2997     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2998     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2999     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3000     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3001     for (j=0; j<m; j++) {
3002       procsnz[0] += ourlens[j];
3003     }
3004     for (i=1; i<size; i++) {
3005       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3006       /* calculate the number of nonzeros on each processor */
3007       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3008         procsnz[i] += rowlengths[j];
3009       }
3010       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3011     }
3012     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3013   } else {
3014     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3015   }
3016 
3017   if (!rank) {
3018     /* determine max buffer needed and allocate it */
3019     maxnz = 0;
3020     for (i=0; i<size; i++) {
3021       maxnz = PetscMax(maxnz,procsnz[i]);
3022     }
3023     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3024 
3025     /* read in my part of the matrix column indices  */
3026     nz   = procsnz[0];
3027     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3028     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3029 
3030     /* read in every one elses and ship off */
3031     for (i=1; i<size; i++) {
3032       nz     = procsnz[i];
3033       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3034       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3035     }
3036     ierr = PetscFree(cols);CHKERRQ(ierr);
3037   } else {
3038     /* determine buffer space needed for message */
3039     nz = 0;
3040     for (i=0; i<m; i++) {
3041       nz += ourlens[i];
3042     }
3043     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3044 
3045     /* receive message of column indices*/
3046     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3047   }
3048 
3049   /* determine column ownership if matrix is not square */
3050   if (N != M) {
3051     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3052     else n = newMat->cmap->n;
3053     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3054     cstart = cend - n;
3055   } else {
3056     cstart = rstart;
3057     cend   = rend;
3058     n      = cend - cstart;
3059   }
3060 
3061   /* loop over local rows, determining number of off diagonal entries */
3062   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3063   jj = 0;
3064   for (i=0; i<m; i++) {
3065     for (j=0; j<ourlens[i]; j++) {
3066       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3067       jj++;
3068     }
3069   }
3070 
3071   for (i=0; i<m; i++) {
3072     ourlens[i] -= offlens[i];
3073   }
3074   if (!sizesset) {
3075     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3076   }
3077   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3078 
3079   for (i=0; i<m; i++) {
3080     ourlens[i] += offlens[i];
3081   }
3082 
3083   if (!rank) {
3084     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3085 
3086     /* read in my part of the matrix numerical values  */
3087     nz   = procsnz[0];
3088     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3089 
3090     /* insert into matrix */
3091     jj      = rstart;
3092     smycols = mycols;
3093     svals   = vals;
3094     for (i=0; i<m; i++) {
3095       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3096       smycols += ourlens[i];
3097       svals   += ourlens[i];
3098       jj++;
3099     }
3100 
3101     /* read in other processors and ship out */
3102     for (i=1; i<size; i++) {
3103       nz     = procsnz[i];
3104       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3105       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3106     }
3107     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3108   } else {
3109     /* receive numeric values */
3110     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3111 
3112     /* receive message of values*/
3113     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3114 
3115     /* insert into matrix */
3116     jj      = rstart;
3117     smycols = mycols;
3118     svals   = vals;
3119     for (i=0; i<m; i++) {
3120       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3121       smycols += ourlens[i];
3122       svals   += ourlens[i];
3123       jj++;
3124     }
3125   }
3126   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3127   ierr = PetscFree(vals);CHKERRQ(ierr);
3128   ierr = PetscFree(mycols);CHKERRQ(ierr);
3129   ierr = PetscFree(rowners);CHKERRQ(ierr);
3130 
3131   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3132   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3133   PetscFunctionReturn(0);
3134 }
3135 
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3138 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3139 {
3140   PetscErrorCode ierr;
3141   IS             iscol_local;
3142   PetscInt       csize;
3143 
3144   PetscFunctionBegin;
3145   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3146   if (call == MAT_REUSE_MATRIX) {
3147     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3148     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3149   } else {
3150     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3151   }
3152   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3153   if (call == MAT_INITIAL_MATRIX) {
3154     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3155     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3156   }
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3162 /*
3163     Not great since it makes two copies of the submatrix, first an SeqAIJ
3164   in local and then by concatenating the local matrices the end result.
3165   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3166 
3167   Note: This requires a sequential iscol with all indices.
3168 */
3169 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3170 {
3171   PetscErrorCode ierr;
3172   PetscMPIInt    rank,size;
3173   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3174   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3175   Mat            *local,M,Mreuse;
3176   MatScalar      *vwork,*aa;
3177   MPI_Comm       comm = ((PetscObject)mat)->comm;
3178   Mat_SeqAIJ     *aij;
3179 
3180 
3181   PetscFunctionBegin;
3182   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3184 
3185   if (call ==  MAT_REUSE_MATRIX) {
3186     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3187     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3188     local = &Mreuse;
3189     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3190   } else {
3191     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3192     Mreuse = *local;
3193     ierr   = PetscFree(local);CHKERRQ(ierr);
3194   }
3195 
3196   /*
3197       m - number of local rows
3198       n - number of columns (same on all processors)
3199       rstart - first row in new global matrix generated
3200   */
3201   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3202   if (call == MAT_INITIAL_MATRIX) {
3203     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3204     ii  = aij->i;
3205     jj  = aij->j;
3206 
3207     /*
3208         Determine the number of non-zeros in the diagonal and off-diagonal
3209         portions of the matrix in order to do correct preallocation
3210     */
3211 
3212     /* first get start and end of "diagonal" columns */
3213     if (csize == PETSC_DECIDE) {
3214       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3215       if (mglobal == n) { /* square matrix */
3216 	nlocal = m;
3217       } else {
3218         nlocal = n/size + ((n % size) > rank);
3219       }
3220     } else {
3221       nlocal = csize;
3222     }
3223     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3224     rstart = rend - nlocal;
3225     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3226 
3227     /* next, compute all the lengths */
3228     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3229     olens = dlens + m;
3230     for (i=0; i<m; i++) {
3231       jend = ii[i+1] - ii[i];
3232       olen = 0;
3233       dlen = 0;
3234       for (j=0; j<jend; j++) {
3235         if (*jj < rstart || *jj >= rend) olen++;
3236         else dlen++;
3237         jj++;
3238       }
3239       olens[i] = olen;
3240       dlens[i] = dlen;
3241     }
3242     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3243     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3244     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3245     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3246     ierr = PetscFree(dlens);CHKERRQ(ierr);
3247   } else {
3248     PetscInt ml,nl;
3249 
3250     M = *newmat;
3251     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3252     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3253     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3254     /*
3255          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3256        rather than the slower MatSetValues().
3257     */
3258     M->was_assembled = PETSC_TRUE;
3259     M->assembled     = PETSC_FALSE;
3260   }
3261   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3262   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3263   ii  = aij->i;
3264   jj  = aij->j;
3265   aa  = aij->a;
3266   for (i=0; i<m; i++) {
3267     row   = rstart + i;
3268     nz    = ii[i+1] - ii[i];
3269     cwork = jj;     jj += nz;
3270     vwork = aa;     aa += nz;
3271     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3272   }
3273 
3274   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3275   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3276   *newmat = M;
3277 
3278   /* save submatrix used in processor for next request */
3279   if (call ==  MAT_INITIAL_MATRIX) {
3280     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3281     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3282   }
3283 
3284   PetscFunctionReturn(0);
3285 }
3286 
3287 EXTERN_C_BEGIN
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3290 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3291 {
3292   PetscInt       m,cstart, cend,j,nnz,i,d;
3293   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3294   const PetscInt *JJ;
3295   PetscScalar    *values;
3296   PetscErrorCode ierr;
3297 
3298   PetscFunctionBegin;
3299   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3300 
3301   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3302   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3303   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3304   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3305   m      = B->rmap->n;
3306   cstart = B->cmap->rstart;
3307   cend   = B->cmap->rend;
3308   rstart = B->rmap->rstart;
3309 
3310   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3311 
3312 #if defined(PETSC_USE_DEBUGGING)
3313   for (i=0; i<m; i++) {
3314     nnz     = Ii[i+1]- Ii[i];
3315     JJ      = J + Ii[i];
3316     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3317     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3318     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3319   }
3320 #endif
3321 
3322   for (i=0; i<m; i++) {
3323     nnz     = Ii[i+1]- Ii[i];
3324     JJ      = J + Ii[i];
3325     nnz_max = PetscMax(nnz_max,nnz);
3326     d       = 0;
3327     for (j=0; j<nnz; j++) {
3328       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3329     }
3330     d_nnz[i] = d;
3331     o_nnz[i] = nnz - d;
3332   }
3333   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3334   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3335 
3336   if (v) values = (PetscScalar*)v;
3337   else {
3338     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3339     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3340   }
3341 
3342   for (i=0; i<m; i++) {
3343     ii   = i + rstart;
3344     nnz  = Ii[i+1]- Ii[i];
3345     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3346   }
3347   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3348   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3349 
3350   if (!v) {
3351     ierr = PetscFree(values);CHKERRQ(ierr);
3352   }
3353   PetscFunctionReturn(0);
3354 }
3355 EXTERN_C_END
3356 
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3359 /*@
3360    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3361    (the default parallel PETSc format).
3362 
3363    Collective on MPI_Comm
3364 
3365    Input Parameters:
3366 +  B - the matrix
3367 .  i - the indices into j for the start of each local row (starts with zero)
3368 .  j - the column indices for each local row (starts with zero)
3369 -  v - optional values in the matrix
3370 
3371    Level: developer
3372 
3373    Notes:
3374        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3375      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3376      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3377 
3378        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3379 
3380        The format which is used for the sparse matrix input, is equivalent to a
3381     row-major ordering.. i.e for the following matrix, the input data expected is
3382     as shown:
3383 
3384         1 0 0
3385         2 0 3     P0
3386        -------
3387         4 5 6     P1
3388 
3389      Process0 [P0]: rows_owned=[0,1]
3390         i =  {0,1,3}  [size = nrow+1  = 2+1]
3391         j =  {0,0,2}  [size = nz = 6]
3392         v =  {1,2,3}  [size = nz = 6]
3393 
3394      Process1 [P1]: rows_owned=[2]
3395         i =  {0,3}    [size = nrow+1  = 1+1]
3396         j =  {0,1,2}  [size = nz = 6]
3397         v =  {4,5,6}  [size = nz = 6]
3398 
3399 .keywords: matrix, aij, compressed row, sparse, parallel
3400 
3401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3402           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3403 @*/
3404 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3405 {
3406   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3407 
3408   PetscFunctionBegin;
3409   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3410   if (f) {
3411     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3412   }
3413   PetscFunctionReturn(0);
3414 }
3415 
3416 #undef __FUNCT__
3417 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3418 /*@C
3419    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3420    (the default parallel PETSc format).  For good matrix assembly performance
3421    the user should preallocate the matrix storage by setting the parameters
3422    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3423    performance can be increased by more than a factor of 50.
3424 
3425    Collective on MPI_Comm
3426 
3427    Input Parameters:
3428 +  A - the matrix
3429 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3430            (same value is used for all local rows)
3431 .  d_nnz - array containing the number of nonzeros in the various rows of the
3432            DIAGONAL portion of the local submatrix (possibly different for each row)
3433            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3434            The size of this array is equal to the number of local rows, i.e 'm'.
3435            You must leave room for the diagonal entry even if it is zero.
3436 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3437            submatrix (same value is used for all local rows).
3438 -  o_nnz - array containing the number of nonzeros in the various rows of the
3439            OFF-DIAGONAL portion of the local submatrix (possibly different for
3440            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3441            structure. The size of this array is equal to the number
3442            of local rows, i.e 'm'.
3443 
3444    If the *_nnz parameter is given then the *_nz parameter is ignored
3445 
3446    The AIJ format (also called the Yale sparse matrix format or
3447    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3448    storage.  The stored row and column indices begin with zero.
3449    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3450 
3451    The parallel matrix is partitioned such that the first m0 rows belong to
3452    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3453    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3454 
3455    The DIAGONAL portion of the local submatrix of a processor can be defined
3456    as the submatrix which is obtained by extraction the part corresponding to
3457    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3458    first row that belongs to the processor, r2 is the last row belonging to
3459    the this processor, and c1-c2 is range of indices of the local part of a
3460    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3461    common case of a square matrix, the row and column ranges are the same and
3462    the DIAGONAL part is also square. The remaining portion of the local
3463    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3464 
3465    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3466 
3467    You can call MatGetInfo() to get information on how effective the preallocation was;
3468    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3469    You can also run with the option -info and look for messages with the string
3470    malloc in them to see if additional memory allocation was needed.
3471 
3472    Example usage:
3473 
3474    Consider the following 8x8 matrix with 34 non-zero values, that is
3475    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3476    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3477    as follows:
3478 
3479 .vb
3480             1  2  0  |  0  3  0  |  0  4
3481     Proc0   0  5  6  |  7  0  0  |  8  0
3482             9  0 10  | 11  0  0  | 12  0
3483     -------------------------------------
3484            13  0 14  | 15 16 17  |  0  0
3485     Proc1   0 18  0  | 19 20 21  |  0  0
3486             0  0  0  | 22 23  0  | 24  0
3487     -------------------------------------
3488     Proc2  25 26 27  |  0  0 28  | 29  0
3489            30  0  0  | 31 32 33  |  0 34
3490 .ve
3491 
3492    This can be represented as a collection of submatrices as:
3493 
3494 .vb
3495       A B C
3496       D E F
3497       G H I
3498 .ve
3499 
3500    Where the submatrices A,B,C are owned by proc0, D,E,F are
3501    owned by proc1, G,H,I are owned by proc2.
3502 
3503    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3504    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3505    The 'M','N' parameters are 8,8, and have the same values on all procs.
3506 
3507    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3508    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3509    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3510    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3511    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3512    matrix, ans [DF] as another SeqAIJ matrix.
3513 
3514    When d_nz, o_nz parameters are specified, d_nz storage elements are
3515    allocated for every row of the local diagonal submatrix, and o_nz
3516    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3517    One way to choose d_nz and o_nz is to use the max nonzerors per local
3518    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3519    In this case, the values of d_nz,o_nz are:
3520 .vb
3521      proc0 : dnz = 2, o_nz = 2
3522      proc1 : dnz = 3, o_nz = 2
3523      proc2 : dnz = 1, o_nz = 4
3524 .ve
3525    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3526    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3527    for proc3. i.e we are using 12+15+10=37 storage locations to store
3528    34 values.
3529 
3530    When d_nnz, o_nnz parameters are specified, the storage is specified
3531    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3532    In the above case the values for d_nnz,o_nnz are:
3533 .vb
3534      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3535      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3536      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3537 .ve
3538    Here the space allocated is sum of all the above values i.e 34, and
3539    hence pre-allocation is perfect.
3540 
3541    Level: intermediate
3542 
3543 .keywords: matrix, aij, compressed row, sparse, parallel
3544 
3545 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3546           MPIAIJ, MatGetInfo()
3547 @*/
3548 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3549 {
3550   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3551 
3552   PetscFunctionBegin;
3553   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3554   if (f) {
3555     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3556   }
3557   PetscFunctionReturn(0);
3558 }
3559 
3560 #undef __FUNCT__
3561 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3562 /*@
3563      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3564          CSR format the local rows.
3565 
3566    Collective on MPI_Comm
3567 
3568    Input Parameters:
3569 +  comm - MPI communicator
3570 .  m - number of local rows (Cannot be PETSC_DECIDE)
3571 .  n - This value should be the same as the local size used in creating the
3572        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3573        calculated if N is given) For square matrices n is almost always m.
3574 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3575 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3576 .   i - row indices
3577 .   j - column indices
3578 -   a - matrix values
3579 
3580    Output Parameter:
3581 .   mat - the matrix
3582 
3583    Level: intermediate
3584 
3585    Notes:
3586        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3587      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3588      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3589 
3590        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3591 
3592        The format which is used for the sparse matrix input, is equivalent to a
3593     row-major ordering.. i.e for the following matrix, the input data expected is
3594     as shown:
3595 
3596         1 0 0
3597         2 0 3     P0
3598        -------
3599         4 5 6     P1
3600 
3601      Process0 [P0]: rows_owned=[0,1]
3602         i =  {0,1,3}  [size = nrow+1  = 2+1]
3603         j =  {0,0,2}  [size = nz = 6]
3604         v =  {1,2,3}  [size = nz = 6]
3605 
3606      Process1 [P1]: rows_owned=[2]
3607         i =  {0,3}    [size = nrow+1  = 1+1]
3608         j =  {0,1,2}  [size = nz = 6]
3609         v =  {4,5,6}  [size = nz = 6]
3610 
3611 .keywords: matrix, aij, compressed row, sparse, parallel
3612 
3613 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3614           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3615 @*/
3616 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3617 {
3618   PetscErrorCode ierr;
3619 
3620  PetscFunctionBegin;
3621   if (i[0]) {
3622     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3623   }
3624   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3625   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3626   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3627   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3628   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3629   PetscFunctionReturn(0);
3630 }
3631 
3632 #undef __FUNCT__
3633 #define __FUNCT__ "MatCreateMPIAIJ"
3634 /*@C
3635    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3636    (the default parallel PETSc format).  For good matrix assembly performance
3637    the user should preallocate the matrix storage by setting the parameters
3638    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3639    performance can be increased by more than a factor of 50.
3640 
3641    Collective on MPI_Comm
3642 
3643    Input Parameters:
3644 +  comm - MPI communicator
3645 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3646            This value should be the same as the local size used in creating the
3647            y vector for the matrix-vector product y = Ax.
3648 .  n - This value should be the same as the local size used in creating the
3649        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3650        calculated if N is given) For square matrices n is almost always m.
3651 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3652 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3653 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3654            (same value is used for all local rows)
3655 .  d_nnz - array containing the number of nonzeros in the various rows of the
3656            DIAGONAL portion of the local submatrix (possibly different for each row)
3657            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3658            The size of this array is equal to the number of local rows, i.e 'm'.
3659            You must leave room for the diagonal entry even if it is zero.
3660 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3661            submatrix (same value is used for all local rows).
3662 -  o_nnz - array containing the number of nonzeros in the various rows of the
3663            OFF-DIAGONAL portion of the local submatrix (possibly different for
3664            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3665            structure. The size of this array is equal to the number
3666            of local rows, i.e 'm'.
3667 
3668    Output Parameter:
3669 .  A - the matrix
3670 
3671    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3672    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3673    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3674 
3675    Notes:
3676    If the *_nnz parameter is given then the *_nz parameter is ignored
3677 
3678    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3679    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3680    storage requirements for this matrix.
3681 
3682    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3683    processor than it must be used on all processors that share the object for
3684    that argument.
3685 
3686    The user MUST specify either the local or global matrix dimensions
3687    (possibly both).
3688 
3689    The parallel matrix is partitioned across processors such that the
3690    first m0 rows belong to process 0, the next m1 rows belong to
3691    process 1, the next m2 rows belong to process 2 etc.. where
3692    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3693    values corresponding to [m x N] submatrix.
3694 
3695    The columns are logically partitioned with the n0 columns belonging
3696    to 0th partition, the next n1 columns belonging to the next
3697    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3698 
3699    The DIAGONAL portion of the local submatrix on any given processor
3700    is the submatrix corresponding to the rows and columns m,n
3701    corresponding to the given processor. i.e diagonal matrix on
3702    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3703    etc. The remaining portion of the local submatrix [m x (N-n)]
3704    constitute the OFF-DIAGONAL portion. The example below better
3705    illustrates this concept.
3706 
3707    For a square global matrix we define each processor's diagonal portion
3708    to be its local rows and the corresponding columns (a square submatrix);
3709    each processor's off-diagonal portion encompasses the remainder of the
3710    local matrix (a rectangular submatrix).
3711 
3712    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3713 
3714    When calling this routine with a single process communicator, a matrix of
3715    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3716    type of communicator, use the construction mechanism:
3717      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3718 
3719    By default, this format uses inodes (identical nodes) when possible.
3720    We search for consecutive rows with the same nonzero structure, thereby
3721    reusing matrix information to achieve increased efficiency.
3722 
3723    Options Database Keys:
3724 +  -mat_no_inode  - Do not use inodes
3725 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3726 -  -mat_aij_oneindex - Internally use indexing starting at 1
3727         rather than 0.  Note that when calling MatSetValues(),
3728         the user still MUST index entries starting at 0!
3729 
3730 
3731    Example usage:
3732 
3733    Consider the following 8x8 matrix with 34 non-zero values, that is
3734    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3735    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3736    as follows:
3737 
3738 .vb
3739             1  2  0  |  0  3  0  |  0  4
3740     Proc0   0  5  6  |  7  0  0  |  8  0
3741             9  0 10  | 11  0  0  | 12  0
3742     -------------------------------------
3743            13  0 14  | 15 16 17  |  0  0
3744     Proc1   0 18  0  | 19 20 21  |  0  0
3745             0  0  0  | 22 23  0  | 24  0
3746     -------------------------------------
3747     Proc2  25 26 27  |  0  0 28  | 29  0
3748            30  0  0  | 31 32 33  |  0 34
3749 .ve
3750 
3751    This can be represented as a collection of submatrices as:
3752 
3753 .vb
3754       A B C
3755       D E F
3756       G H I
3757 .ve
3758 
3759    Where the submatrices A,B,C are owned by proc0, D,E,F are
3760    owned by proc1, G,H,I are owned by proc2.
3761 
3762    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3763    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3764    The 'M','N' parameters are 8,8, and have the same values on all procs.
3765 
3766    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3767    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3768    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3769    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3770    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3771    matrix, ans [DF] as another SeqAIJ matrix.
3772 
3773    When d_nz, o_nz parameters are specified, d_nz storage elements are
3774    allocated for every row of the local diagonal submatrix, and o_nz
3775    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3776    One way to choose d_nz and o_nz is to use the max nonzerors per local
3777    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3778    In this case, the values of d_nz,o_nz are:
3779 .vb
3780      proc0 : dnz = 2, o_nz = 2
3781      proc1 : dnz = 3, o_nz = 2
3782      proc2 : dnz = 1, o_nz = 4
3783 .ve
3784    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3785    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3786    for proc3. i.e we are using 12+15+10=37 storage locations to store
3787    34 values.
3788 
3789    When d_nnz, o_nnz parameters are specified, the storage is specified
3790    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3791    In the above case the values for d_nnz,o_nnz are:
3792 .vb
3793      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3794      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3795      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3796 .ve
3797    Here the space allocated is sum of all the above values i.e 34, and
3798    hence pre-allocation is perfect.
3799 
3800    Level: intermediate
3801 
3802 .keywords: matrix, aij, compressed row, sparse, parallel
3803 
3804 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3805           MPIAIJ, MatCreateMPIAIJWithArrays()
3806 @*/
3807 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3808 {
3809   PetscErrorCode ierr;
3810   PetscMPIInt    size;
3811 
3812   PetscFunctionBegin;
3813   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3814   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3815   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3816   if (size > 1) {
3817     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3818     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3819   } else {
3820     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3821     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3822   }
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 #undef __FUNCT__
3827 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3828 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3829 {
3830   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3831 
3832   PetscFunctionBegin;
3833   *Ad     = a->A;
3834   *Ao     = a->B;
3835   *colmap = a->garray;
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3841 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3842 {
3843   PetscErrorCode ierr;
3844   PetscInt       i;
3845   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3846 
3847   PetscFunctionBegin;
3848   if (coloring->ctype == IS_COLORING_GLOBAL) {
3849     ISColoringValue *allcolors,*colors;
3850     ISColoring      ocoloring;
3851 
3852     /* set coloring for diagonal portion */
3853     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3854 
3855     /* set coloring for off-diagonal portion */
3856     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3857     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3858     for (i=0; i<a->B->cmap->n; i++) {
3859       colors[i] = allcolors[a->garray[i]];
3860     }
3861     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3862     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3863     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3864     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3865   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3866     ISColoringValue *colors;
3867     PetscInt        *larray;
3868     ISColoring      ocoloring;
3869 
3870     /* set coloring for diagonal portion */
3871     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3872     for (i=0; i<a->A->cmap->n; i++) {
3873       larray[i] = i + A->cmap->rstart;
3874     }
3875     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3876     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3877     for (i=0; i<a->A->cmap->n; i++) {
3878       colors[i] = coloring->colors[larray[i]];
3879     }
3880     ierr = PetscFree(larray);CHKERRQ(ierr);
3881     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3882     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3883     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3884 
3885     /* set coloring for off-diagonal portion */
3886     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3887     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3888     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3889     for (i=0; i<a->B->cmap->n; i++) {
3890       colors[i] = coloring->colors[larray[i]];
3891     }
3892     ierr = PetscFree(larray);CHKERRQ(ierr);
3893     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3894     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3895     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3896   } else {
3897     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3898   }
3899 
3900   PetscFunctionReturn(0);
3901 }
3902 
3903 #if defined(PETSC_HAVE_ADIC)
3904 #undef __FUNCT__
3905 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3906 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3907 {
3908   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3909   PetscErrorCode ierr;
3910 
3911   PetscFunctionBegin;
3912   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3913   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3914   PetscFunctionReturn(0);
3915 }
3916 #endif
3917 
3918 #undef __FUNCT__
3919 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3920 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3921 {
3922   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3923   PetscErrorCode ierr;
3924 
3925   PetscFunctionBegin;
3926   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3927   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3928   PetscFunctionReturn(0);
3929 }
3930 
3931 #undef __FUNCT__
3932 #define __FUNCT__ "MatMerge"
3933 /*@
3934       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3935                  matrices from each processor
3936 
3937     Collective on MPI_Comm
3938 
3939    Input Parameters:
3940 +    comm - the communicators the parallel matrix will live on
3941 .    inmat - the input sequential matrices
3942 .    n - number of local columns (or PETSC_DECIDE)
3943 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3944 
3945    Output Parameter:
3946 .    outmat - the parallel matrix generated
3947 
3948     Level: advanced
3949 
3950    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3951 
3952 @*/
3953 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3954 {
3955   PetscErrorCode ierr;
3956   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3957   PetscInt       *indx;
3958   PetscScalar    *values;
3959 
3960   PetscFunctionBegin;
3961   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3962   if (scall == MAT_INITIAL_MATRIX){
3963     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3964     if (n == PETSC_DECIDE){
3965       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3966     }
3967     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3968     rstart -= m;
3969 
3970     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3971     for (i=0;i<m;i++) {
3972       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3973       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3974       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3975     }
3976     /* This routine will ONLY return MPIAIJ type matrix */
3977     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3978     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3979     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3980     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3981     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3982 
3983   } else if (scall == MAT_REUSE_MATRIX){
3984     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3985   } else {
3986     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3987   }
3988 
3989   for (i=0;i<m;i++) {
3990     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3991     Ii    = i + rstart;
3992     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3993     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3994   }
3995   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3996   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3997   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3998 
3999   PetscFunctionReturn(0);
4000 }
4001 
4002 #undef __FUNCT__
4003 #define __FUNCT__ "MatFileSplit"
4004 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4005 {
4006   PetscErrorCode    ierr;
4007   PetscMPIInt       rank;
4008   PetscInt          m,N,i,rstart,nnz;
4009   size_t            len;
4010   const PetscInt    *indx;
4011   PetscViewer       out;
4012   char              *name;
4013   Mat               B;
4014   const PetscScalar *values;
4015 
4016   PetscFunctionBegin;
4017   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4018   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4019   /* Should this be the type of the diagonal block of A? */
4020   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4021   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4022   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4023   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4024   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4025   for (i=0;i<m;i++) {
4026     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4027     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4028     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4029   }
4030   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4031   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4032 
4033   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4034   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4035   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4036   sprintf(name,"%s.%d",outfile,rank);
4037   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4038   ierr = PetscFree(name);
4039   ierr = MatView(B,out);CHKERRQ(ierr);
4040   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4041   ierr = MatDestroy(B);CHKERRQ(ierr);
4042   PetscFunctionReturn(0);
4043 }
4044 
4045 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4046 #undef __FUNCT__
4047 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4048 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4049 {
4050   PetscErrorCode       ierr;
4051   Mat_Merge_SeqsToMPI  *merge;
4052   PetscContainer       container;
4053 
4054   PetscFunctionBegin;
4055   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4056   if (container) {
4057     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4058     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4059     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4060     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4061     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4062     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4063     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4064     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4065     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4066     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4067     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4068     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4069     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4070     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4071 
4072     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4073     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4074   }
4075   ierr = PetscFree(merge);CHKERRQ(ierr);
4076 
4077   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4078   PetscFunctionReturn(0);
4079 }
4080 
4081 #include "../src/mat/utils/freespace.h"
4082 #include "petscbt.h"
4083 
4084 #undef __FUNCT__
4085 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4086 /*@C
4087       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4088                  matrices from each processor
4089 
4090     Collective on MPI_Comm
4091 
4092    Input Parameters:
4093 +    comm - the communicators the parallel matrix will live on
4094 .    seqmat - the input sequential matrices
4095 .    m - number of local rows (or PETSC_DECIDE)
4096 .    n - number of local columns (or PETSC_DECIDE)
4097 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4098 
4099    Output Parameter:
4100 .    mpimat - the parallel matrix generated
4101 
4102     Level: advanced
4103 
4104    Notes:
4105      The dimensions of the sequential matrix in each processor MUST be the same.
4106      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4107      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4108 @*/
4109 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4110 {
4111   PetscErrorCode       ierr;
4112   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4113   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4114   PetscMPIInt          size,rank,taga,*len_s;
4115   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4116   PetscInt             proc,m;
4117   PetscInt             **buf_ri,**buf_rj;
4118   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4119   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4120   MPI_Request          *s_waits,*r_waits;
4121   MPI_Status           *status;
4122   MatScalar            *aa=a->a;
4123   MatScalar            **abuf_r,*ba_i;
4124   Mat_Merge_SeqsToMPI  *merge;
4125   PetscContainer       container;
4126 
4127   PetscFunctionBegin;
4128   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4129 
4130   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4131   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4132 
4133   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4134   if (container) {
4135     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4136   }
4137   bi     = merge->bi;
4138   bj     = merge->bj;
4139   buf_ri = merge->buf_ri;
4140   buf_rj = merge->buf_rj;
4141 
4142   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4143   owners = merge->rowmap->range;
4144   len_s  = merge->len_s;
4145 
4146   /* send and recv matrix values */
4147   /*-----------------------------*/
4148   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4149   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4150 
4151   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4152   for (proc=0,k=0; proc<size; proc++){
4153     if (!len_s[proc]) continue;
4154     i = owners[proc];
4155     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4156     k++;
4157   }
4158 
4159   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4160   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4161   ierr = PetscFree(status);CHKERRQ(ierr);
4162 
4163   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4164   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4165 
4166   /* insert mat values of mpimat */
4167   /*----------------------------*/
4168   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4169   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4170 
4171   for (k=0; k<merge->nrecv; k++){
4172     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4173     nrows = *(buf_ri_k[k]);
4174     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4175     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4176   }
4177 
4178   /* set values of ba */
4179   m = merge->rowmap->n;
4180   for (i=0; i<m; i++) {
4181     arow = owners[rank] + i;
4182     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4183     bnzi = bi[i+1] - bi[i];
4184     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4185 
4186     /* add local non-zero vals of this proc's seqmat into ba */
4187     anzi = ai[arow+1] - ai[arow];
4188     aj   = a->j + ai[arow];
4189     aa   = a->a + ai[arow];
4190     nextaj = 0;
4191     for (j=0; nextaj<anzi; j++){
4192       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4193         ba_i[j] += aa[nextaj++];
4194       }
4195     }
4196 
4197     /* add received vals into ba */
4198     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4199       /* i-th row */
4200       if (i == *nextrow[k]) {
4201         anzi = *(nextai[k]+1) - *nextai[k];
4202         aj   = buf_rj[k] + *(nextai[k]);
4203         aa   = abuf_r[k] + *(nextai[k]);
4204         nextaj = 0;
4205         for (j=0; nextaj<anzi; j++){
4206           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4207             ba_i[j] += aa[nextaj++];
4208           }
4209         }
4210         nextrow[k]++; nextai[k]++;
4211       }
4212     }
4213     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4214   }
4215   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4216   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4217 
4218   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4219   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4220   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4221   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4222   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4223   PetscFunctionReturn(0);
4224 }
4225 
4226 #undef __FUNCT__
4227 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4228 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4229 {
4230   PetscErrorCode       ierr;
4231   Mat                  B_mpi;
4232   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4233   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4234   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4235   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4236   PetscInt             len,proc,*dnz,*onz;
4237   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4238   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4239   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4240   MPI_Status           *status;
4241   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4242   PetscBT              lnkbt;
4243   Mat_Merge_SeqsToMPI  *merge;
4244   PetscContainer       container;
4245 
4246   PetscFunctionBegin;
4247   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4248 
4249   /* make sure it is a PETSc comm */
4250   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4253 
4254   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4255   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4256 
4257   /* determine row ownership */
4258   /*---------------------------------------------------------*/
4259   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4260   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4261   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4262   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4263   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4264   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4265   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4266 
4267   m      = merge->rowmap->n;
4268   M      = merge->rowmap->N;
4269   owners = merge->rowmap->range;
4270 
4271   /* determine the number of messages to send, their lengths */
4272   /*---------------------------------------------------------*/
4273   len_s  = merge->len_s;
4274 
4275   len = 0;  /* length of buf_si[] */
4276   merge->nsend = 0;
4277   for (proc=0; proc<size; proc++){
4278     len_si[proc] = 0;
4279     if (proc == rank){
4280       len_s[proc] = 0;
4281     } else {
4282       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4283       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4284     }
4285     if (len_s[proc]) {
4286       merge->nsend++;
4287       nrows = 0;
4288       for (i=owners[proc]; i<owners[proc+1]; i++){
4289         if (ai[i+1] > ai[i]) nrows++;
4290       }
4291       len_si[proc] = 2*(nrows+1);
4292       len += len_si[proc];
4293     }
4294   }
4295 
4296   /* determine the number and length of messages to receive for ij-structure */
4297   /*-------------------------------------------------------------------------*/
4298   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4299   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4300 
4301   /* post the Irecv of j-structure */
4302   /*-------------------------------*/
4303   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4304   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4305 
4306   /* post the Isend of j-structure */
4307   /*--------------------------------*/
4308   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4309 
4310   for (proc=0, k=0; proc<size; proc++){
4311     if (!len_s[proc]) continue;
4312     i = owners[proc];
4313     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4314     k++;
4315   }
4316 
4317   /* receives and sends of j-structure are complete */
4318   /*------------------------------------------------*/
4319   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4320   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4321 
4322   /* send and recv i-structure */
4323   /*---------------------------*/
4324   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4325   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4326 
4327   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4328   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4329   for (proc=0,k=0; proc<size; proc++){
4330     if (!len_s[proc]) continue;
4331     /* form outgoing message for i-structure:
4332          buf_si[0]:                 nrows to be sent
4333                [1:nrows]:           row index (global)
4334                [nrows+1:2*nrows+1]: i-structure index
4335     */
4336     /*-------------------------------------------*/
4337     nrows = len_si[proc]/2 - 1;
4338     buf_si_i    = buf_si + nrows+1;
4339     buf_si[0]   = nrows;
4340     buf_si_i[0] = 0;
4341     nrows = 0;
4342     for (i=owners[proc]; i<owners[proc+1]; i++){
4343       anzi = ai[i+1] - ai[i];
4344       if (anzi) {
4345         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4346         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4347         nrows++;
4348       }
4349     }
4350     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4351     k++;
4352     buf_si += len_si[proc];
4353   }
4354 
4355   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4356   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4357 
4358   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4359   for (i=0; i<merge->nrecv; i++){
4360     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4361   }
4362 
4363   ierr = PetscFree(len_si);CHKERRQ(ierr);
4364   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4365   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4366   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4367   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4368   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4369   ierr = PetscFree(status);CHKERRQ(ierr);
4370 
4371   /* compute a local seq matrix in each processor */
4372   /*----------------------------------------------*/
4373   /* allocate bi array and free space for accumulating nonzero column info */
4374   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4375   bi[0] = 0;
4376 
4377   /* create and initialize a linked list */
4378   nlnk = N+1;
4379   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4380 
4381   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4382   len = 0;
4383   len  = ai[owners[rank+1]] - ai[owners[rank]];
4384   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4385   current_space = free_space;
4386 
4387   /* determine symbolic info for each local row */
4388   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4389 
4390   for (k=0; k<merge->nrecv; k++){
4391     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4392     nrows = *buf_ri_k[k];
4393     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4394     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4395   }
4396 
4397   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4398   len = 0;
4399   for (i=0;i<m;i++) {
4400     bnzi   = 0;
4401     /* add local non-zero cols of this proc's seqmat into lnk */
4402     arow   = owners[rank] + i;
4403     anzi   = ai[arow+1] - ai[arow];
4404     aj     = a->j + ai[arow];
4405     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4406     bnzi += nlnk;
4407     /* add received col data into lnk */
4408     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4409       if (i == *nextrow[k]) { /* i-th row */
4410         anzi = *(nextai[k]+1) - *nextai[k];
4411         aj   = buf_rj[k] + *nextai[k];
4412         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4413         bnzi += nlnk;
4414         nextrow[k]++; nextai[k]++;
4415       }
4416     }
4417     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4418 
4419     /* if free space is not available, make more free space */
4420     if (current_space->local_remaining<bnzi) {
4421       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4422       nspacedouble++;
4423     }
4424     /* copy data into free space, then initialize lnk */
4425     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4426     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4427 
4428     current_space->array           += bnzi;
4429     current_space->local_used      += bnzi;
4430     current_space->local_remaining -= bnzi;
4431 
4432     bi[i+1] = bi[i] + bnzi;
4433   }
4434 
4435   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4436 
4437   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4438   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4439   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4440 
4441   /* create symbolic parallel matrix B_mpi */
4442   /*---------------------------------------*/
4443   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4444   if (n==PETSC_DECIDE) {
4445     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4446   } else {
4447     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4448   }
4449   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4450   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4451   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4452 
4453   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4454   B_mpi->assembled     = PETSC_FALSE;
4455   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4456   merge->bi            = bi;
4457   merge->bj            = bj;
4458   merge->buf_ri        = buf_ri;
4459   merge->buf_rj        = buf_rj;
4460   merge->coi           = PETSC_NULL;
4461   merge->coj           = PETSC_NULL;
4462   merge->owners_co     = PETSC_NULL;
4463 
4464   /* attach the supporting struct to B_mpi for reuse */
4465   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4466   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4467   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4468   *mpimat = B_mpi;
4469 
4470   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4471   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4472   PetscFunctionReturn(0);
4473 }
4474 
4475 #undef __FUNCT__
4476 #define __FUNCT__ "MatMerge_SeqsToMPI"
4477 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4478 {
4479   PetscErrorCode   ierr;
4480 
4481   PetscFunctionBegin;
4482   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4483   if (scall == MAT_INITIAL_MATRIX){
4484     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4485   }
4486   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4487   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 #undef __FUNCT__
4492 #define __FUNCT__ "MatGetLocalMat"
4493 /*@
4494      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4495 
4496     Not Collective
4497 
4498    Input Parameters:
4499 +    A - the matrix
4500 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4501 
4502    Output Parameter:
4503 .    A_loc - the local sequential matrix generated
4504 
4505     Level: developer
4506 
4507 @*/
4508 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4509 {
4510   PetscErrorCode  ierr;
4511   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4512   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4513   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4514   MatScalar       *aa=a->a,*ba=b->a,*cam;
4515   PetscScalar     *ca;
4516   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4517   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4518 
4519   PetscFunctionBegin;
4520   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4521   if (scall == MAT_INITIAL_MATRIX){
4522     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4523     ci[0] = 0;
4524     for (i=0; i<am; i++){
4525       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4526     }
4527     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4528     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4529     k = 0;
4530     for (i=0; i<am; i++) {
4531       ncols_o = bi[i+1] - bi[i];
4532       ncols_d = ai[i+1] - ai[i];
4533       /* off-diagonal portion of A */
4534       for (jo=0; jo<ncols_o; jo++) {
4535         col = cmap[*bj];
4536         if (col >= cstart) break;
4537         cj[k]   = col; bj++;
4538         ca[k++] = *ba++;
4539       }
4540       /* diagonal portion of A */
4541       for (j=0; j<ncols_d; j++) {
4542         cj[k]   = cstart + *aj++;
4543         ca[k++] = *aa++;
4544       }
4545       /* off-diagonal portion of A */
4546       for (j=jo; j<ncols_o; j++) {
4547         cj[k]   = cmap[*bj++];
4548         ca[k++] = *ba++;
4549       }
4550     }
4551     /* put together the new matrix */
4552     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4553     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4554     /* Since these are PETSc arrays, change flags to free them as necessary. */
4555     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4556     mat->free_a  = PETSC_TRUE;
4557     mat->free_ij = PETSC_TRUE;
4558     mat->nonew   = 0;
4559   } else if (scall == MAT_REUSE_MATRIX){
4560     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4561     ci = mat->i; cj = mat->j; cam = mat->a;
4562     for (i=0; i<am; i++) {
4563       /* off-diagonal portion of A */
4564       ncols_o = bi[i+1] - bi[i];
4565       for (jo=0; jo<ncols_o; jo++) {
4566         col = cmap[*bj];
4567         if (col >= cstart) break;
4568         *cam++ = *ba++; bj++;
4569       }
4570       /* diagonal portion of A */
4571       ncols_d = ai[i+1] - ai[i];
4572       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4573       /* off-diagonal portion of A */
4574       for (j=jo; j<ncols_o; j++) {
4575         *cam++ = *ba++; bj++;
4576       }
4577     }
4578   } else {
4579     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4580   }
4581 
4582   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4583   PetscFunctionReturn(0);
4584 }
4585 
4586 #undef __FUNCT__
4587 #define __FUNCT__ "MatGetLocalMatCondensed"
4588 /*@C
4589      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4590 
4591     Not Collective
4592 
4593    Input Parameters:
4594 +    A - the matrix
4595 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4596 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4597 
4598    Output Parameter:
4599 .    A_loc - the local sequential matrix generated
4600 
4601     Level: developer
4602 
4603 @*/
4604 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4605 {
4606   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4607   PetscErrorCode    ierr;
4608   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4609   IS                isrowa,iscola;
4610   Mat               *aloc;
4611 
4612   PetscFunctionBegin;
4613   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4614   if (!row){
4615     start = A->rmap->rstart; end = A->rmap->rend;
4616     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4617   } else {
4618     isrowa = *row;
4619   }
4620   if (!col){
4621     start = A->cmap->rstart;
4622     cmap  = a->garray;
4623     nzA   = a->A->cmap->n;
4624     nzB   = a->B->cmap->n;
4625     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4626     ncols = 0;
4627     for (i=0; i<nzB; i++) {
4628       if (cmap[i] < start) idx[ncols++] = cmap[i];
4629       else break;
4630     }
4631     imark = i;
4632     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4633     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4634     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4635     ierr = PetscFree(idx);CHKERRQ(ierr);
4636   } else {
4637     iscola = *col;
4638   }
4639   if (scall != MAT_INITIAL_MATRIX){
4640     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4641     aloc[0] = *A_loc;
4642   }
4643   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4644   *A_loc = aloc[0];
4645   ierr = PetscFree(aloc);CHKERRQ(ierr);
4646   if (!row){
4647     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4648   }
4649   if (!col){
4650     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4651   }
4652   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4653   PetscFunctionReturn(0);
4654 }
4655 
4656 #undef __FUNCT__
4657 #define __FUNCT__ "MatGetBrowsOfAcols"
4658 /*@C
4659     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4660 
4661     Collective on Mat
4662 
4663    Input Parameters:
4664 +    A,B - the matrices in mpiaij format
4665 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4666 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4667 
4668    Output Parameter:
4669 +    rowb, colb - index sets of rows and columns of B to extract
4670 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4671 -    B_seq - the sequential matrix generated
4672 
4673     Level: developer
4674 
4675 @*/
4676 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4677 {
4678   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4679   PetscErrorCode    ierr;
4680   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4681   IS                isrowb,iscolb;
4682   Mat               *bseq;
4683 
4684   PetscFunctionBegin;
4685   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4686     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4687   }
4688   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4689 
4690   if (scall == MAT_INITIAL_MATRIX){
4691     start = A->cmap->rstart;
4692     cmap  = a->garray;
4693     nzA   = a->A->cmap->n;
4694     nzB   = a->B->cmap->n;
4695     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4696     ncols = 0;
4697     for (i=0; i<nzB; i++) {  /* row < local row index */
4698       if (cmap[i] < start) idx[ncols++] = cmap[i];
4699       else break;
4700     }
4701     imark = i;
4702     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4703     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4704     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4705     ierr = PetscFree(idx);CHKERRQ(ierr);
4706     *brstart = imark;
4707     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4708   } else {
4709     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4710     isrowb = *rowb; iscolb = *colb;
4711     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4712     bseq[0] = *B_seq;
4713   }
4714   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4715   *B_seq = bseq[0];
4716   ierr = PetscFree(bseq);CHKERRQ(ierr);
4717   if (!rowb){
4718     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4719   } else {
4720     *rowb = isrowb;
4721   }
4722   if (!colb){
4723     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4724   } else {
4725     *colb = iscolb;
4726   }
4727   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4728   PetscFunctionReturn(0);
4729 }
4730 
4731 #undef __FUNCT__
4732 #define __FUNCT__ "MatGetBrowsOfAoCols"
4733 /*@C
4734     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4735     of the OFF-DIAGONAL portion of local A
4736 
4737     Collective on Mat
4738 
4739    Input Parameters:
4740 +    A,B - the matrices in mpiaij format
4741 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4742 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4743 .    startsj_r - similar to startsj for receives
4744 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4745 
4746    Output Parameter:
4747 +    B_oth - the sequential matrix generated
4748 
4749     Level: developer
4750 
4751 @*/
4752 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4753 {
4754   VecScatter_MPI_General *gen_to,*gen_from;
4755   PetscErrorCode         ierr;
4756   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4757   Mat_SeqAIJ             *b_oth;
4758   VecScatter             ctx=a->Mvctx;
4759   MPI_Comm               comm=((PetscObject)ctx)->comm;
4760   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4761   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4762   PetscScalar            *rvalues,*svalues;
4763   MatScalar              *b_otha,*bufa,*bufA;
4764   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4765   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4766   MPI_Status             *sstatus,rstatus;
4767   PetscMPIInt            jj;
4768   PetscInt               *cols,sbs,rbs;
4769   PetscScalar            *vals;
4770 
4771   PetscFunctionBegin;
4772   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4773     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4774   }
4775   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4776   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4777 
4778   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4779   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4780   rvalues  = gen_from->values; /* holds the length of receiving row */
4781   svalues  = gen_to->values;   /* holds the length of sending row */
4782   nrecvs   = gen_from->n;
4783   nsends   = gen_to->n;
4784 
4785   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4786   srow     = gen_to->indices;   /* local row index to be sent */
4787   sstarts  = gen_to->starts;
4788   sprocs   = gen_to->procs;
4789   sstatus  = gen_to->sstatus;
4790   sbs      = gen_to->bs;
4791   rstarts  = gen_from->starts;
4792   rprocs   = gen_from->procs;
4793   rbs      = gen_from->bs;
4794 
4795   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4796   if (scall == MAT_INITIAL_MATRIX){
4797     /* i-array */
4798     /*---------*/
4799     /*  post receives */
4800     for (i=0; i<nrecvs; i++){
4801       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4802       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4803       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4804     }
4805 
4806     /* pack the outgoing message */
4807     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4808     sstartsj[0] = 0;  rstartsj[0] = 0;
4809     len = 0; /* total length of j or a array to be sent */
4810     k = 0;
4811     for (i=0; i<nsends; i++){
4812       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4813       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4814       for (j=0; j<nrows; j++) {
4815         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4816         for (l=0; l<sbs; l++){
4817           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4818           rowlen[j*sbs+l] = ncols;
4819           len += ncols;
4820           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4821         }
4822         k++;
4823       }
4824       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4825       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4826     }
4827     /* recvs and sends of i-array are completed */
4828     i = nrecvs;
4829     while (i--) {
4830       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4831     }
4832     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4833 
4834     /* allocate buffers for sending j and a arrays */
4835     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4836     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4837 
4838     /* create i-array of B_oth */
4839     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4840     b_othi[0] = 0;
4841     len = 0; /* total length of j or a array to be received */
4842     k = 0;
4843     for (i=0; i<nrecvs; i++){
4844       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4845       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4846       for (j=0; j<nrows; j++) {
4847         b_othi[k+1] = b_othi[k] + rowlen[j];
4848         len += rowlen[j]; k++;
4849       }
4850       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4851     }
4852 
4853     /* allocate space for j and a arrrays of B_oth */
4854     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4855     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4856 
4857     /* j-array */
4858     /*---------*/
4859     /*  post receives of j-array */
4860     for (i=0; i<nrecvs; i++){
4861       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4862       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4863     }
4864 
4865     /* pack the outgoing message j-array */
4866     k = 0;
4867     for (i=0; i<nsends; i++){
4868       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4869       bufJ = bufj+sstartsj[i];
4870       for (j=0; j<nrows; j++) {
4871         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4872         for (ll=0; ll<sbs; ll++){
4873           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4874           for (l=0; l<ncols; l++){
4875             *bufJ++ = cols[l];
4876           }
4877           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4878         }
4879       }
4880       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4881     }
4882 
4883     /* recvs and sends of j-array are completed */
4884     i = nrecvs;
4885     while (i--) {
4886       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4887     }
4888     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4889   } else if (scall == MAT_REUSE_MATRIX){
4890     sstartsj = *startsj;
4891     rstartsj = *startsj_r;
4892     bufa     = *bufa_ptr;
4893     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4894     b_otha   = b_oth->a;
4895   } else {
4896     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4897   }
4898 
4899   /* a-array */
4900   /*---------*/
4901   /*  post receives of a-array */
4902   for (i=0; i<nrecvs; i++){
4903     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4904     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4905   }
4906 
4907   /* pack the outgoing message a-array */
4908   k = 0;
4909   for (i=0; i<nsends; i++){
4910     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4911     bufA = bufa+sstartsj[i];
4912     for (j=0; j<nrows; j++) {
4913       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4914       for (ll=0; ll<sbs; ll++){
4915         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4916         for (l=0; l<ncols; l++){
4917           *bufA++ = vals[l];
4918         }
4919         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4920       }
4921     }
4922     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4923   }
4924   /* recvs and sends of a-array are completed */
4925   i = nrecvs;
4926   while (i--) {
4927     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4928   }
4929   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4930   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4931 
4932   if (scall == MAT_INITIAL_MATRIX){
4933     /* put together the new matrix */
4934     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4935 
4936     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4937     /* Since these are PETSc arrays, change flags to free them as necessary. */
4938     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4939     b_oth->free_a  = PETSC_TRUE;
4940     b_oth->free_ij = PETSC_TRUE;
4941     b_oth->nonew   = 0;
4942 
4943     ierr = PetscFree(bufj);CHKERRQ(ierr);
4944     if (!startsj || !bufa_ptr){
4945       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4946       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4947     } else {
4948       *startsj   = sstartsj;
4949       *startsj_r = rstartsj;
4950       *bufa_ptr  = bufa;
4951     }
4952   }
4953   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4954   PetscFunctionReturn(0);
4955 }
4956 
4957 #undef __FUNCT__
4958 #define __FUNCT__ "MatGetCommunicationStructs"
4959 /*@C
4960   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4961 
4962   Not Collective
4963 
4964   Input Parameters:
4965 . A - The matrix in mpiaij format
4966 
4967   Output Parameter:
4968 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4969 . colmap - A map from global column index to local index into lvec
4970 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4971 
4972   Level: developer
4973 
4974 @*/
4975 #if defined (PETSC_USE_CTABLE)
4976 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4977 #else
4978 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4979 #endif
4980 {
4981   Mat_MPIAIJ *a;
4982 
4983   PetscFunctionBegin;
4984   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4985   PetscValidPointer(lvec, 2);
4986   PetscValidPointer(colmap, 3);
4987   PetscValidPointer(multScatter, 4);
4988   a = (Mat_MPIAIJ *) A->data;
4989   if (lvec) *lvec = a->lvec;
4990   if (colmap) *colmap = a->colmap;
4991   if (multScatter) *multScatter = a->Mvctx;
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 EXTERN_C_BEGIN
4996 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
4997 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
4998 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4999 EXTERN_C_END
5000 
5001 #undef __FUNCT__
5002 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5003 /*
5004     Computes (B'*A')' since computing B*A directly is untenable
5005 
5006                n                       p                          p
5007         (              )       (              )         (                  )
5008       m (      A       )  *  n (       B      )   =   m (         C        )
5009         (              )       (              )         (                  )
5010 
5011 */
5012 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5013 {
5014   PetscErrorCode     ierr;
5015   Mat                At,Bt,Ct;
5016 
5017   PetscFunctionBegin;
5018   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5019   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5020   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5021   ierr = MatDestroy(At);CHKERRQ(ierr);
5022   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5023   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5024   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5025   PetscFunctionReturn(0);
5026 }
5027 
5028 #undef __FUNCT__
5029 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5030 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5031 {
5032   PetscErrorCode ierr;
5033   PetscInt       m=A->rmap->n,n=B->cmap->n;
5034   Mat            Cmat;
5035 
5036   PetscFunctionBegin;
5037   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5038   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5039   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5040   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5041   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5042   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5043   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5044   *C   = Cmat;
5045   PetscFunctionReturn(0);
5046 }
5047 
5048 /* ----------------------------------------------------------------*/
5049 #undef __FUNCT__
5050 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5051 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5052 {
5053   PetscErrorCode ierr;
5054 
5055   PetscFunctionBegin;
5056   if (scall == MAT_INITIAL_MATRIX){
5057     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5058   }
5059   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5060   PetscFunctionReturn(0);
5061 }
5062 
5063 EXTERN_C_BEGIN
5064 #if defined(PETSC_HAVE_MUMPS)
5065 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5066 #endif
5067 #if defined(PETSC_HAVE_PASTIX)
5068 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5069 #endif
5070 #if defined(PETSC_HAVE_SUPERLU_DIST)
5071 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5072 #endif
5073 #if defined(PETSC_HAVE_SPOOLES)
5074 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5075 #endif
5076 EXTERN_C_END
5077 
5078 /*MC
5079    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5080 
5081    Options Database Keys:
5082 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5083 
5084   Level: beginner
5085 
5086 .seealso: MatCreateMPIAIJ()
5087 M*/
5088 
5089 EXTERN_C_BEGIN
5090 #undef __FUNCT__
5091 #define __FUNCT__ "MatCreate_MPIAIJ"
5092 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5093 {
5094   Mat_MPIAIJ     *b;
5095   PetscErrorCode ierr;
5096   PetscMPIInt    size;
5097 
5098   PetscFunctionBegin;
5099   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5100 
5101   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5102   B->data         = (void*)b;
5103   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5104   B->rmap->bs     = 1;
5105   B->assembled    = PETSC_FALSE;
5106   B->mapping      = 0;
5107 
5108   B->insertmode   = NOT_SET_VALUES;
5109   b->size         = size;
5110   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5111 
5112   /* build cache for off array entries formed */
5113   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5114   b->donotstash  = PETSC_FALSE;
5115   b->colmap      = 0;
5116   b->garray      = 0;
5117   b->roworiented = PETSC_TRUE;
5118 
5119   /* stuff used for matrix vector multiply */
5120   b->lvec      = PETSC_NULL;
5121   b->Mvctx     = PETSC_NULL;
5122 
5123   /* stuff for MatGetRow() */
5124   b->rowindices   = 0;
5125   b->rowvalues    = 0;
5126   b->getrowactive = PETSC_FALSE;
5127 
5128 #if defined(PETSC_HAVE_SPOOLES)
5129   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5130                                      "MatGetFactor_mpiaij_spooles",
5131                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5132 #endif
5133 #if defined(PETSC_HAVE_MUMPS)
5134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5135                                      "MatGetFactor_aij_mumps",
5136                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5137 #endif
5138 #if defined(PETSC_HAVE_PASTIX)
5139   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5140 					   "MatGetFactor_mpiaij_pastix",
5141 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5142 #endif
5143 #if defined(PETSC_HAVE_SUPERLU_DIST)
5144   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5145                                      "MatGetFactor_mpiaij_superlu_dist",
5146                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5147 #endif
5148   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5149                                      "MatStoreValues_MPIAIJ",
5150                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5151   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5152                                      "MatRetrieveValues_MPIAIJ",
5153                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5154   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5155 				     "MatGetDiagonalBlock_MPIAIJ",
5156                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5157   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5158 				     "MatIsTranspose_MPIAIJ",
5159 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5160   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5161 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5162 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5163   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5164 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5165 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5166   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5167 				     "MatDiagonalScaleLocal_MPIAIJ",
5168 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5169   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5170                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5171                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5172   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5173                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5174                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5175   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5176                                      "MatConvert_MPIAIJ_MPISBAIJ",
5177                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5178   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5179                                      "MatMatMult_MPIDense_MPIAIJ",
5180                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5181   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5182                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5183                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5184   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5185                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5186                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5187   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5188   PetscFunctionReturn(0);
5189 }
5190 EXTERN_C_END
5191 
5192 #undef __FUNCT__
5193 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5194 /*@
5195      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5196          and "off-diagonal" part of the matrix in CSR format.
5197 
5198    Collective on MPI_Comm
5199 
5200    Input Parameters:
5201 +  comm - MPI communicator
5202 .  m - number of local rows (Cannot be PETSC_DECIDE)
5203 .  n - This value should be the same as the local size used in creating the
5204        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5205        calculated if N is given) For square matrices n is almost always m.
5206 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5207 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5208 .   i - row indices for "diagonal" portion of matrix
5209 .   j - column indices
5210 .   a - matrix values
5211 .   oi - row indices for "off-diagonal" portion of matrix
5212 .   oj - column indices
5213 -   oa - matrix values
5214 
5215    Output Parameter:
5216 .   mat - the matrix
5217 
5218    Level: advanced
5219 
5220    Notes:
5221        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5222 
5223        The i and j indices are 0 based
5224 
5225        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5226 
5227        This sets local rows and cannot be used to set off-processor values.
5228 
5229        You cannot later use MatSetValues() to change values in this matrix.
5230 
5231 .keywords: matrix, aij, compressed row, sparse, parallel
5232 
5233 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5234           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5235 @*/
5236 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5237 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5238 {
5239   PetscErrorCode ierr;
5240   Mat_MPIAIJ     *maij;
5241 
5242  PetscFunctionBegin;
5243   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5244   if (i[0]) {
5245     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5246   }
5247   if (oi[0]) {
5248     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5249   }
5250   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5251   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5252   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5253   maij = (Mat_MPIAIJ*) (*mat)->data;
5254   maij->donotstash     = PETSC_TRUE;
5255   (*mat)->preallocated = PETSC_TRUE;
5256 
5257   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5258   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5259   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5260   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5261 
5262   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5263   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5264 
5265   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5266   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5267   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5268   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5269 
5270   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5271   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5272   PetscFunctionReturn(0);
5273 }
5274 
5275 /*
5276     Special version for direct calls from Fortran
5277 */
5278 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5279 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5280 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5281 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5282 #endif
5283 
5284 /* Change these macros so can be used in void function */
5285 #undef CHKERRQ
5286 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5287 #undef SETERRQ2
5288 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5289 #undef SETERRQ
5290 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5291 
5292 EXTERN_C_BEGIN
5293 #undef __FUNCT__
5294 #define __FUNCT__ "matsetvaluesmpiaij_"
5295 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5296 {
5297   Mat             mat = *mmat;
5298   PetscInt        m = *mm, n = *mn;
5299   InsertMode      addv = *maddv;
5300   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5301   PetscScalar     value;
5302   PetscErrorCode  ierr;
5303 
5304   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5305   if (mat->insertmode == NOT_SET_VALUES) {
5306     mat->insertmode = addv;
5307   }
5308 #if defined(PETSC_USE_DEBUG)
5309   else if (mat->insertmode != addv) {
5310     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5311   }
5312 #endif
5313   {
5314   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5315   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5316   PetscBool       roworiented = aij->roworiented;
5317 
5318   /* Some Variables required in the macro */
5319   Mat             A = aij->A;
5320   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5321   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5322   MatScalar       *aa = a->a;
5323   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5324   Mat             B = aij->B;
5325   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5326   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5327   MatScalar       *ba = b->a;
5328 
5329   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5330   PetscInt        nonew = a->nonew;
5331   MatScalar       *ap1,*ap2;
5332 
5333   PetscFunctionBegin;
5334   for (i=0; i<m; i++) {
5335     if (im[i] < 0) continue;
5336 #if defined(PETSC_USE_DEBUG)
5337     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5338 #endif
5339     if (im[i] >= rstart && im[i] < rend) {
5340       row      = im[i] - rstart;
5341       lastcol1 = -1;
5342       rp1      = aj + ai[row];
5343       ap1      = aa + ai[row];
5344       rmax1    = aimax[row];
5345       nrow1    = ailen[row];
5346       low1     = 0;
5347       high1    = nrow1;
5348       lastcol2 = -1;
5349       rp2      = bj + bi[row];
5350       ap2      = ba + bi[row];
5351       rmax2    = bimax[row];
5352       nrow2    = bilen[row];
5353       low2     = 0;
5354       high2    = nrow2;
5355 
5356       for (j=0; j<n; j++) {
5357         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5358         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5359         if (in[j] >= cstart && in[j] < cend){
5360           col = in[j] - cstart;
5361           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5362         } else if (in[j] < 0) continue;
5363 #if defined(PETSC_USE_DEBUG)
5364         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5365 #endif
5366         else {
5367           if (mat->was_assembled) {
5368             if (!aij->colmap) {
5369               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5370             }
5371 #if defined (PETSC_USE_CTABLE)
5372             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5373 	    col--;
5374 #else
5375             col = aij->colmap[in[j]] - 1;
5376 #endif
5377             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5378               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5379               col =  in[j];
5380               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5381               B = aij->B;
5382               b = (Mat_SeqAIJ*)B->data;
5383               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5384               rp2      = bj + bi[row];
5385               ap2      = ba + bi[row];
5386               rmax2    = bimax[row];
5387               nrow2    = bilen[row];
5388               low2     = 0;
5389               high2    = nrow2;
5390               bm       = aij->B->rmap->n;
5391               ba = b->a;
5392             }
5393           } else col = in[j];
5394           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5395         }
5396       }
5397     } else {
5398       if (!aij->donotstash) {
5399         if (roworiented) {
5400           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5401         } else {
5402           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5403         }
5404       }
5405     }
5406   }}
5407   PetscFunctionReturnVoid();
5408 }
5409 EXTERN_C_END
5410 
5411