xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d8a68f868e1e4be81235540c3d88957f1cb144c1)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscBool      found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   if (A->nooffproczerorows) {
606     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
607     nrecvs = nsends;
608     nmax   = N;
609   } else {
610     /* inform other processors of number of messages and max length*/
611     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
612   }
613 
614   /* post receives:   */
615   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
616   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
617   for (i=0; i<nrecvs; i++) {
618     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
619   }
620 
621   /* do sends:
622       1) starts[i] gives the starting index in svalues for stuff going to
623          the ith processor
624   */
625   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
626   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
627   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
628   starts[0] = 0;
629   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   for (i=0; i<N; i++) {
631     svalues[starts[owner[i]]++] = rows[i];
632   }
633 
634   starts[0] = 0;
635   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
636   count = 0;
637   for (i=0; i<size; i++) {
638     if (nprocs[2*i+1]) {
639       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
640     }
641   }
642   ierr = PetscFree(starts);CHKERRQ(ierr);
643 
644   base = owners[rank];
645 
646   /*  wait on receives */
647   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
648   count  = nrecvs; slen = 0;
649   while (count) {
650     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
651     /* unpack receives into our local space */
652     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
653     source[imdex]  = recv_status.MPI_SOURCE;
654     lens[imdex]    = n;
655     slen          += n;
656     count--;
657   }
658   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
659 
660   /* move the data into the send scatter */
661   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
662   count = 0;
663   for (i=0; i<nrecvs; i++) {
664     values = rvalues + i*nmax;
665     for (j=0; j<lens[i]; j++) {
666       lrows[count++] = values[j] - base;
667     }
668   }
669   ierr = PetscFree(rvalues);CHKERRQ(ierr);
670   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
671   ierr = PetscFree(owner);CHKERRQ(ierr);
672   ierr = PetscFree(nprocs);CHKERRQ(ierr);
673 
674   /* actually zap the local rows */
675   /*
676         Zero the required rows. If the "diagonal block" of the matrix
677      is square and the user wishes to set the diagonal we use separate
678      code so that MatSetValues() is not called for each diagonal allocating
679      new memory, thus calling lots of mallocs and slowing things down.
680 
681   */
682   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
683   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
684   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
685     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
686   } else if (diag != 0.0) {
687     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
688     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
689       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
691     }
692     for (i = 0; i < slen; i++) {
693       row  = lrows[i] + rstart;
694       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
695     }
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698   } else {
699     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
700   }
701   ierr = PetscFree(lrows);CHKERRQ(ierr);
702 
703   /* wait on sends */
704   if (nsends) {
705     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
706     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
707     ierr = PetscFree(send_status);CHKERRQ(ierr);
708   }
709   ierr = PetscFree(send_waits);CHKERRQ(ierr);
710   ierr = PetscFree(svalues);CHKERRQ(ierr);
711 
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatMult_MPIAIJ"
717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721   PetscInt       nt;
722 
723   PetscFunctionBegin;
724   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
725   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
726   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
728   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNCT__
734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
736 {
737   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
738   PetscErrorCode ierr;
739 
740   PetscFunctionBegin;
741   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatMultAdd_MPIAIJ"
747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
748 {
749   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
755   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
763 {
764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
765   PetscErrorCode ierr;
766   PetscBool      merged;
767 
768   PetscFunctionBegin;
769   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
770   /* do nondiagonal part */
771   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
772   if (!merged) {
773     /* send it on its way */
774     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
775     /* do local part */
776     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
777     /* receive remote parts: note this assumes the values are not actually */
778     /* added in yy until the next line, */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   } else {
781     /* do local part */
782     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
783     /* send it on its way */
784     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785     /* values actually were received in the Begin() but we need to call this nop */
786     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 EXTERN_C_BEGIN
792 #undef __FUNCT__
793 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
795 {
796   MPI_Comm       comm;
797   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
798   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
799   IS             Me,Notme;
800   PetscErrorCode ierr;
801   PetscInt       M,N,first,last,*notme,i;
802   PetscMPIInt    size;
803 
804   PetscFunctionBegin;
805 
806   /* Easy test: symmetric diagonal block */
807   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
808   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
809   if (!*f) PetscFunctionReturn(0);
810   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
812   if (size == 1) PetscFunctionReturn(0);
813 
814   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
815   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
816   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
817   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
818   for (i=0; i<first; i++) notme[i] = i;
819   for (i=last; i<M; i++) notme[i-last+first] = i;
820   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
821   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
822   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
823   Aoff = Aoffs[0];
824   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
825   Boff = Boffs[0];
826   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
828   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
829   ierr = ISDestroy(Me);CHKERRQ(ierr);
830   ierr = ISDestroy(Notme);CHKERRQ(ierr);
831 
832   PetscFunctionReturn(0);
833 }
834 EXTERN_C_END
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
839 {
840   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode ierr;
842 
843   PetscFunctionBegin;
844   /* do nondiagonal part */
845   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
846   /* send it on its way */
847   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
848   /* do local part */
849   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
850   /* receive remote parts */
851   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
852   PetscFunctionReturn(0);
853 }
854 
855 /*
856   This only works correctly for square matrices where the subblock A->A is the
857    diagonal block
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
862 {
863   PetscErrorCode ierr;
864   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
865 
866   PetscFunctionBegin;
867   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
868   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
900   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_CLASSID;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscBool         isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscBool  inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscBool  isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscBool  flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscBool      iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscBool      hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SPD:
1449     A->spd_set                         = PETSC_TRUE;
1450     A->spd                             = flg;
1451     if (flg) {
1452       A->symmetric                     = PETSC_TRUE;
1453       A->structurally_symmetric        = PETSC_TRUE;
1454       A->symmetric_set                 = PETSC_TRUE;
1455       A->structurally_symmetric_set    = PETSC_TRUE;
1456     }
1457     break;
1458   case MAT_SYMMETRIC:
1459     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1460     break;
1461   case MAT_STRUCTURALLY_SYMMETRIC:
1462     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1463     break;
1464   case MAT_HERMITIAN:
1465     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1466     break;
1467   case MAT_SYMMETRY_ETERNAL:
1468     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1469     break;
1470   default:
1471     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatGetRow_MPIAIJ"
1478 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1482   PetscErrorCode ierr;
1483   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1484   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1485   PetscInt       *cmap,*idx_p;
1486 
1487   PetscFunctionBegin;
1488   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1489   mat->getrowactive = PETSC_TRUE;
1490 
1491   if (!mat->rowvalues && (idx || v)) {
1492     /*
1493         allocate enough space to hold information from the longest row.
1494     */
1495     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1496     PetscInt   max = 1,tmp;
1497     for (i=0; i<matin->rmap->n; i++) {
1498       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1499       if (max < tmp) { max = tmp; }
1500     }
1501     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1502   }
1503 
1504   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1505   lrow = row - rstart;
1506 
1507   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1508   if (!v)   {pvA = 0; pvB = 0;}
1509   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1510   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1511   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1512   nztot = nzA + nzB;
1513 
1514   cmap  = mat->garray;
1515   if (v  || idx) {
1516     if (nztot) {
1517       /* Sort by increasing column numbers, assuming A and B already sorted */
1518       PetscInt imark = -1;
1519       if (v) {
1520         *v = v_p = mat->rowvalues;
1521         for (i=0; i<nzB; i++) {
1522           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1523           else break;
1524         }
1525         imark = i;
1526         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1527         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1528       }
1529       if (idx) {
1530         *idx = idx_p = mat->rowindices;
1531         if (imark > -1) {
1532           for (i=0; i<imark; i++) {
1533             idx_p[i] = cmap[cworkB[i]];
1534           }
1535         } else {
1536           for (i=0; i<nzB; i++) {
1537             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1538             else break;
1539           }
1540           imark = i;
1541         }
1542         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1543         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1544       }
1545     } else {
1546       if (idx) *idx = 0;
1547       if (v)   *v   = 0;
1548     }
1549   }
1550   *nz = nztot;
1551   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1552   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1558 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1559 {
1560   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1561 
1562   PetscFunctionBegin;
1563   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1564   aij->getrowactive = PETSC_FALSE;
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatNorm_MPIAIJ"
1570 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1571 {
1572   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1573   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1574   PetscErrorCode ierr;
1575   PetscInt       i,j,cstart = mat->cmap->rstart;
1576   PetscReal      sum = 0.0;
1577   MatScalar      *v;
1578 
1579   PetscFunctionBegin;
1580   if (aij->size == 1) {
1581     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1582   } else {
1583     if (type == NORM_FROBENIUS) {
1584       v = amat->a;
1585       for (i=0; i<amat->nz; i++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1588 #else
1589         sum += (*v)*(*v); v++;
1590 #endif
1591       }
1592       v = bmat->a;
1593       for (i=0; i<bmat->nz; i++) {
1594 #if defined(PETSC_USE_COMPLEX)
1595         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1596 #else
1597         sum += (*v)*(*v); v++;
1598 #endif
1599       }
1600       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1601       *norm = sqrt(*norm);
1602     } else if (type == NORM_1) { /* max column norm */
1603       PetscReal *tmp,*tmp2;
1604       PetscInt  *jj,*garray = aij->garray;
1605       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1606       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1607       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1608       *norm = 0.0;
1609       v = amat->a; jj = amat->j;
1610       for (j=0; j<amat->nz; j++) {
1611         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1612       }
1613       v = bmat->a; jj = bmat->j;
1614       for (j=0; j<bmat->nz; j++) {
1615         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1616       }
1617       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1618       for (j=0; j<mat->cmap->N; j++) {
1619         if (tmp2[j] > *norm) *norm = tmp2[j];
1620       }
1621       ierr = PetscFree(tmp);CHKERRQ(ierr);
1622       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1623     } else if (type == NORM_INFINITY) { /* max row norm */
1624       PetscReal ntemp = 0.0;
1625       for (j=0; j<aij->A->rmap->n; j++) {
1626         v = amat->a + amat->i[j];
1627         sum = 0.0;
1628         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1629           sum += PetscAbsScalar(*v); v++;
1630         }
1631         v = bmat->a + bmat->i[j];
1632         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1633           sum += PetscAbsScalar(*v); v++;
1634         }
1635         if (sum > ntemp) ntemp = sum;
1636       }
1637       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1638     } else {
1639       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1640     }
1641   }
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatTranspose_MPIAIJ"
1647 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1648 {
1649   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1650   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1651   PetscErrorCode ierr;
1652   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1653   PetscInt       cstart=A->cmap->rstart,ncol;
1654   Mat            B;
1655   MatScalar      *array;
1656 
1657   PetscFunctionBegin;
1658   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1659 
1660   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1661   ai = Aloc->i; aj = Aloc->j;
1662   bi = Bloc->i; bj = Bloc->j;
1663   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1664     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1665     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1666     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1667     for (i=0; i<ai[ma]; i++){
1668       d_nnz[aj[i]] ++;
1669       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1670     }
1671 
1672     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1673     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1674     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1675     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1676     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1677   } else {
1678     B = *matout;
1679   }
1680 
1681   /* copy over the A part */
1682   array = Aloc->a;
1683   row = A->rmap->rstart;
1684   for (i=0; i<ma; i++) {
1685     ncol = ai[i+1]-ai[i];
1686     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1687     row++; array += ncol; aj += ncol;
1688   }
1689   aj = Aloc->j;
1690   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1691 
1692   /* copy over the B part */
1693   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1694   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1695   array = Bloc->a;
1696   row = A->rmap->rstart;
1697   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1698   cols_tmp = cols;
1699   for (i=0; i<mb; i++) {
1700     ncol = bi[i+1]-bi[i];
1701     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1702     row++; array += ncol; cols_tmp += ncol;
1703   }
1704   ierr = PetscFree(cols);CHKERRQ(ierr);
1705 
1706   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1707   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1708   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1709     *matout = B;
1710   } else {
1711     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1712   }
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 #undef __FUNCT__
1717 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1718 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1719 {
1720   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1721   Mat            a = aij->A,b = aij->B;
1722   PetscErrorCode ierr;
1723   PetscInt       s1,s2,s3;
1724 
1725   PetscFunctionBegin;
1726   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1727   if (rr) {
1728     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1729     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1730     /* Overlap communication with computation. */
1731     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732   }
1733   if (ll) {
1734     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1735     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1736     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1737   }
1738   /* scale  the diagonal block */
1739   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1740 
1741   if (rr) {
1742     /* Do a scatter end and then right scale the off-diagonal block */
1743     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1744     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1745   }
1746 
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1752 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1753 {
1754   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1755   PetscErrorCode ierr;
1756 
1757   PetscFunctionBegin;
1758   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1759   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1760   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1761   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1766 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1767 {
1768   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatEqual_MPIAIJ"
1778 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1779 {
1780   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1781   Mat            a,b,c,d;
1782   PetscBool      flg;
1783   PetscErrorCode ierr;
1784 
1785   PetscFunctionBegin;
1786   a = matA->A; b = matA->B;
1787   c = matB->A; d = matB->B;
1788 
1789   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1790   if (flg) {
1791     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1792   }
1793   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatCopy_MPIAIJ"
1799 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1800 {
1801   PetscErrorCode ierr;
1802   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1803   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1804 
1805   PetscFunctionBegin;
1806   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1807   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1808     /* because of the column compression in the off-processor part of the matrix a->B,
1809        the number of columns in a->B and b->B may be different, hence we cannot call
1810        the MatCopy() directly on the two parts. If need be, we can provide a more
1811        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1812        then copying the submatrices */
1813     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1814   } else {
1815     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1816     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1817   }
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1823 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1824 {
1825   PetscErrorCode ierr;
1826 
1827   PetscFunctionBegin;
1828   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatAXPY_MPIAIJ"
1834 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1835 {
1836   PetscErrorCode ierr;
1837   PetscInt       i;
1838   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1839   PetscBLASInt   bnz,one=1;
1840   Mat_SeqAIJ     *x,*y;
1841 
1842   PetscFunctionBegin;
1843   if (str == SAME_NONZERO_PATTERN) {
1844     PetscScalar alpha = a;
1845     x = (Mat_SeqAIJ *)xx->A->data;
1846     y = (Mat_SeqAIJ *)yy->A->data;
1847     bnz = PetscBLASIntCast(x->nz);
1848     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     bnz = PetscBLASIntCast(x->nz);
1852     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1853   } else if (str == SUBSET_NONZERO_PATTERN) {
1854     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1855 
1856     x = (Mat_SeqAIJ *)xx->B->data;
1857     y = (Mat_SeqAIJ *)yy->B->data;
1858     if (y->xtoy && y->XtoY != xx->B) {
1859       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1860       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1861     }
1862     if (!y->xtoy) { /* get xtoy */
1863       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1864       y->XtoY = xx->B;
1865       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1866     }
1867     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1868   } else {
1869     Mat B;
1870     PetscInt *nnz_d,*nnz_o;
1871     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
1872     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
1873     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
1874     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1875     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
1876     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1877     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
1878     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
1879     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1880     ierr = MatHeaderReplace(Y,B);
1881     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1882     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1883   }
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatConjugate_MPIAIJ"
1891 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1892 {
1893 #if defined(PETSC_USE_COMPLEX)
1894   PetscErrorCode ierr;
1895   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1896 
1897   PetscFunctionBegin;
1898   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1899   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1900 #else
1901   PetscFunctionBegin;
1902 #endif
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #undef __FUNCT__
1907 #define __FUNCT__ "MatRealPart_MPIAIJ"
1908 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1909 {
1910   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1911   PetscErrorCode ierr;
1912 
1913   PetscFunctionBegin;
1914   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1915   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1921 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1922 {
1923   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1924   PetscErrorCode ierr;
1925 
1926   PetscFunctionBegin;
1927   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1928   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #ifdef PETSC_HAVE_PBGL
1933 
1934 #include <boost/parallel/mpi/bsp_process_group.hpp>
1935 #include <boost/graph/distributed/ilu_default_graph.hpp>
1936 #include <boost/graph/distributed/ilu_0_block.hpp>
1937 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1938 #include <boost/graph/distributed/petsc/interface.hpp>
1939 #include <boost/multi_array.hpp>
1940 #include <boost/parallel/distributed_property_map->hpp>
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1944 /*
1945   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1946 */
1947 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1948 {
1949   namespace petsc = boost::distributed::petsc;
1950 
1951   namespace graph_dist = boost::graph::distributed;
1952   using boost::graph::distributed::ilu_default::process_group_type;
1953   using boost::graph::ilu_permuted;
1954 
1955   PetscBool       row_identity, col_identity;
1956   PetscContainer  c;
1957   PetscInt        m, n, M, N;
1958   PetscErrorCode  ierr;
1959 
1960   PetscFunctionBegin;
1961   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1962   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1963   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1964   if (!row_identity || !col_identity) {
1965     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1966   }
1967 
1968   process_group_type pg;
1969   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1970   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1971   lgraph_type&   level_graph = *lgraph_p;
1972   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1973 
1974   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1975   ilu_permuted(level_graph);
1976 
1977   /* put together the new matrix */
1978   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1979   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1980   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1981   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1982   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1983   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1984   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1985 
1986   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1987   ierr = PetscContainerSetPointer(c, lgraph_p);
1988   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 #undef __FUNCT__
1993 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1994 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1995 {
1996   PetscFunctionBegin;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatSolve_MPIAIJ"
2002 /*
2003   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2004 */
2005 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2006 {
2007   namespace graph_dist = boost::graph::distributed;
2008 
2009   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2010   lgraph_type*   lgraph_p;
2011   PetscContainer c;
2012   PetscErrorCode ierr;
2013 
2014   PetscFunctionBegin;
2015   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2016   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2017   ierr = VecCopy(b, x);CHKERRQ(ierr);
2018 
2019   PetscScalar* array_x;
2020   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2021   PetscInt sx;
2022   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2023 
2024   PetscScalar* array_b;
2025   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2026   PetscInt sb;
2027   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2028 
2029   lgraph_type&   level_graph = *lgraph_p;
2030   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2031 
2032   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2033   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2034                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2035 
2036   typedef boost::iterator_property_map<array_ref_type::iterator,
2037                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2038   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2039                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2040 
2041   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2042 
2043   PetscFunctionReturn(0);
2044 }
2045 #endif
2046 
2047 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2048   PetscInt       nzlocal,nsends,nrecvs;
2049   PetscMPIInt    *send_rank,*recv_rank;
2050   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2051   PetscScalar    *sbuf_a,**rbuf_a;
2052   PetscErrorCode (*MatDestroy)(Mat);
2053 } Mat_Redundant;
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2057 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2058 {
2059   PetscErrorCode       ierr;
2060   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2061   PetscInt             i;
2062 
2063   PetscFunctionBegin;
2064   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2065   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2066   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2067   for (i=0; i<redund->nrecvs; i++){
2068     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2069     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2070   }
2071   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2072   ierr = PetscFree(redund);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatDestroy_MatRedundant"
2078 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2079 {
2080   PetscErrorCode  ierr;
2081   PetscContainer  container;
2082   Mat_Redundant   *redund=PETSC_NULL;
2083 
2084   PetscFunctionBegin;
2085   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2086   if (container) {
2087     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2088   } else {
2089     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2090   }
2091   A->ops->destroy = redund->MatDestroy;
2092   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2093   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2094   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2100 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2101 {
2102   PetscMPIInt    rank,size;
2103   MPI_Comm       comm=((PetscObject)mat)->comm;
2104   PetscErrorCode ierr;
2105   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2106   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2107   PetscInt       *rowrange=mat->rmap->range;
2108   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2109   Mat            A=aij->A,B=aij->B,C=*matredundant;
2110   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2111   PetscScalar    *sbuf_a;
2112   PetscInt       nzlocal=a->nz+b->nz;
2113   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2114   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2115   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2116   MatScalar      *aworkA,*aworkB;
2117   PetscScalar    *vals;
2118   PetscMPIInt    tag1,tag2,tag3,imdex;
2119   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2120                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2121   MPI_Status     recv_status,*send_status;
2122   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2123   PetscInt       **rbuf_j=PETSC_NULL;
2124   PetscScalar    **rbuf_a=PETSC_NULL;
2125   Mat_Redundant  *redund=PETSC_NULL;
2126   PetscContainer container;
2127 
2128   PetscFunctionBegin;
2129   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2130   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2131 
2132   if (reuse == MAT_REUSE_MATRIX) {
2133     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2134     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2135     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2136     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2137     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2138     if (container) {
2139       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2140     } else {
2141       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2142     }
2143     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2144 
2145     nsends    = redund->nsends;
2146     nrecvs    = redund->nrecvs;
2147     send_rank = redund->send_rank;
2148     recv_rank = redund->recv_rank;
2149     sbuf_nz   = redund->sbuf_nz;
2150     rbuf_nz   = redund->rbuf_nz;
2151     sbuf_j    = redund->sbuf_j;
2152     sbuf_a    = redund->sbuf_a;
2153     rbuf_j    = redund->rbuf_j;
2154     rbuf_a    = redund->rbuf_a;
2155   }
2156 
2157   if (reuse == MAT_INITIAL_MATRIX){
2158     PetscMPIInt  subrank,subsize;
2159     PetscInt     nleftover,np_subcomm;
2160     /* get the destination processors' id send_rank, nsends and nrecvs */
2161     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2162     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2163     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2164     np_subcomm = size/nsubcomm;
2165     nleftover  = size - nsubcomm*np_subcomm;
2166     nsends = 0; nrecvs = 0;
2167     for (i=0; i<size; i++){ /* i=rank*/
2168       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2169         send_rank[nsends] = i; nsends++;
2170         recv_rank[nrecvs++] = i;
2171       }
2172     }
2173     if (rank >= size - nleftover){/* this proc is a leftover processor */
2174       i = size-nleftover-1;
2175       j = 0;
2176       while (j < nsubcomm - nleftover){
2177         send_rank[nsends++] = i;
2178         i--; j++;
2179       }
2180     }
2181 
2182     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2183       for (i=0; i<nleftover; i++){
2184         recv_rank[nrecvs++] = size-nleftover+i;
2185       }
2186     }
2187 
2188     /* allocate sbuf_j, sbuf_a */
2189     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2190     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2191     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2192   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2193 
2194   /* copy mat's local entries into the buffers */
2195   if (reuse == MAT_INITIAL_MATRIX){
2196     rownz_max = 0;
2197     rptr = sbuf_j;
2198     cols = sbuf_j + rend-rstart + 1;
2199     vals = sbuf_a;
2200     rptr[0] = 0;
2201     for (i=0; i<rend-rstart; i++){
2202       row = i + rstart;
2203       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2204       ncols  = nzA + nzB;
2205       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2206       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2207       /* load the column indices for this row into cols */
2208       lwrite = 0;
2209       for (l=0; l<nzB; l++) {
2210         if ((ctmp = bmap[cworkB[l]]) < cstart){
2211           vals[lwrite]   = aworkB[l];
2212           cols[lwrite++] = ctmp;
2213         }
2214       }
2215       for (l=0; l<nzA; l++){
2216         vals[lwrite]   = aworkA[l];
2217         cols[lwrite++] = cstart + cworkA[l];
2218       }
2219       for (l=0; l<nzB; l++) {
2220         if ((ctmp = bmap[cworkB[l]]) >= cend){
2221           vals[lwrite]   = aworkB[l];
2222           cols[lwrite++] = ctmp;
2223         }
2224       }
2225       vals += ncols;
2226       cols += ncols;
2227       rptr[i+1] = rptr[i] + ncols;
2228       if (rownz_max < ncols) rownz_max = ncols;
2229     }
2230     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2231   } else { /* only copy matrix values into sbuf_a */
2232     rptr = sbuf_j;
2233     vals = sbuf_a;
2234     rptr[0] = 0;
2235     for (i=0; i<rend-rstart; i++){
2236       row = i + rstart;
2237       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2238       ncols  = nzA + nzB;
2239       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2240       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2241       lwrite = 0;
2242       for (l=0; l<nzB; l++) {
2243         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2244       }
2245       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2246       for (l=0; l<nzB; l++) {
2247         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2248       }
2249       vals += ncols;
2250       rptr[i+1] = rptr[i] + ncols;
2251     }
2252   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2253 
2254   /* send nzlocal to others, and recv other's nzlocal */
2255   /*--------------------------------------------------*/
2256   if (reuse == MAT_INITIAL_MATRIX){
2257     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2258     s_waits2 = s_waits3 + nsends;
2259     s_waits1 = s_waits2 + nsends;
2260     r_waits1 = s_waits1 + nsends;
2261     r_waits2 = r_waits1 + nrecvs;
2262     r_waits3 = r_waits2 + nrecvs;
2263   } else {
2264     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2265     r_waits3 = s_waits3 + nsends;
2266   }
2267 
2268   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2269   if (reuse == MAT_INITIAL_MATRIX){
2270     /* get new tags to keep the communication clean */
2271     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2272     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2273     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2274 
2275     /* post receives of other's nzlocal */
2276     for (i=0; i<nrecvs; i++){
2277       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2278     }
2279     /* send nzlocal to others */
2280     for (i=0; i<nsends; i++){
2281       sbuf_nz[i] = nzlocal;
2282       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2283     }
2284     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2285     count = nrecvs;
2286     while (count) {
2287       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2288       recv_rank[imdex] = recv_status.MPI_SOURCE;
2289       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2290       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2291 
2292       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2293       rbuf_nz[imdex] += i + 2;
2294       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2295       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2296       count--;
2297     }
2298     /* wait on sends of nzlocal */
2299     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2300     /* send mat->i,j to others, and recv from other's */
2301     /*------------------------------------------------*/
2302     for (i=0; i<nsends; i++){
2303       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2304       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2305     }
2306     /* wait on receives of mat->i,j */
2307     /*------------------------------*/
2308     count = nrecvs;
2309     while (count) {
2310       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2311       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2312       count--;
2313     }
2314     /* wait on sends of mat->i,j */
2315     /*---------------------------*/
2316     if (nsends) {
2317       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2318     }
2319   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2320 
2321   /* post receives, send and receive mat->a */
2322   /*----------------------------------------*/
2323   for (imdex=0; imdex<nrecvs; imdex++) {
2324     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2325   }
2326   for (i=0; i<nsends; i++){
2327     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2328   }
2329   count = nrecvs;
2330   while (count) {
2331     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2332     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2333     count--;
2334   }
2335   if (nsends) {
2336     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2337   }
2338 
2339   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2340 
2341   /* create redundant matrix */
2342   /*-------------------------*/
2343   if (reuse == MAT_INITIAL_MATRIX){
2344     /* compute rownz_max for preallocation */
2345     for (imdex=0; imdex<nrecvs; imdex++){
2346       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2347       rptr = rbuf_j[imdex];
2348       for (i=0; i<j; i++){
2349         ncols = rptr[i+1] - rptr[i];
2350         if (rownz_max < ncols) rownz_max = ncols;
2351       }
2352     }
2353 
2354     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2355     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2356     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2357     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2358     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2359   } else {
2360     C = *matredundant;
2361   }
2362 
2363   /* insert local matrix entries */
2364   rptr = sbuf_j;
2365   cols = sbuf_j + rend-rstart + 1;
2366   vals = sbuf_a;
2367   for (i=0; i<rend-rstart; i++){
2368     row   = i + rstart;
2369     ncols = rptr[i+1] - rptr[i];
2370     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2371     vals += ncols;
2372     cols += ncols;
2373   }
2374   /* insert received matrix entries */
2375   for (imdex=0; imdex<nrecvs; imdex++){
2376     rstart = rowrange[recv_rank[imdex]];
2377     rend   = rowrange[recv_rank[imdex]+1];
2378     rptr = rbuf_j[imdex];
2379     cols = rbuf_j[imdex] + rend-rstart + 1;
2380     vals = rbuf_a[imdex];
2381     for (i=0; i<rend-rstart; i++){
2382       row   = i + rstart;
2383       ncols = rptr[i+1] - rptr[i];
2384       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2385       vals += ncols;
2386       cols += ncols;
2387     }
2388   }
2389   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2391   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2392   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2393   if (reuse == MAT_INITIAL_MATRIX){
2394     PetscContainer container;
2395     *matredundant = C;
2396     /* create a supporting struct and attach it to C for reuse */
2397     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2398     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2399     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2400     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2401     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2402 
2403     redund->nzlocal = nzlocal;
2404     redund->nsends  = nsends;
2405     redund->nrecvs  = nrecvs;
2406     redund->send_rank = send_rank;
2407     redund->recv_rank = recv_rank;
2408     redund->sbuf_nz = sbuf_nz;
2409     redund->rbuf_nz = rbuf_nz;
2410     redund->sbuf_j  = sbuf_j;
2411     redund->sbuf_a  = sbuf_a;
2412     redund->rbuf_j  = rbuf_j;
2413     redund->rbuf_a  = rbuf_a;
2414 
2415     redund->MatDestroy = C->ops->destroy;
2416     C->ops->destroy    = MatDestroy_MatRedundant;
2417   }
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2423 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2424 {
2425   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2426   PetscErrorCode ierr;
2427   PetscInt       i,*idxb = 0;
2428   PetscScalar    *va,*vb;
2429   Vec            vtmp;
2430 
2431   PetscFunctionBegin;
2432   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2433   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2434   if (idx) {
2435     for (i=0; i<A->rmap->n; i++) {
2436       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2437     }
2438   }
2439 
2440   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2441   if (idx) {
2442     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2443   }
2444   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2445   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2446 
2447   for (i=0; i<A->rmap->n; i++){
2448     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2449       va[i] = vb[i];
2450       if (idx) idx[i] = a->garray[idxb[i]];
2451     }
2452   }
2453 
2454   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2455   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2456   if (idxb) {
2457     ierr = PetscFree(idxb);CHKERRQ(ierr);
2458   }
2459   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2465 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2466 {
2467   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2468   PetscErrorCode ierr;
2469   PetscInt       i,*idxb = 0;
2470   PetscScalar    *va,*vb;
2471   Vec            vtmp;
2472 
2473   PetscFunctionBegin;
2474   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2475   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2476   if (idx) {
2477     for (i=0; i<A->cmap->n; i++) {
2478       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2479     }
2480   }
2481 
2482   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2483   if (idx) {
2484     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2485   }
2486   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2487   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2488 
2489   for (i=0; i<A->rmap->n; i++){
2490     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2491       va[i] = vb[i];
2492       if (idx) idx[i] = a->garray[idxb[i]];
2493     }
2494   }
2495 
2496   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2497   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2498   if (idxb) {
2499     ierr = PetscFree(idxb);CHKERRQ(ierr);
2500   }
2501   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 #undef __FUNCT__
2506 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2507 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2508 {
2509   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2510   PetscInt       n      = A->rmap->n;
2511   PetscInt       cstart = A->cmap->rstart;
2512   PetscInt      *cmap   = mat->garray;
2513   PetscInt      *diagIdx, *offdiagIdx;
2514   Vec            diagV, offdiagV;
2515   PetscScalar   *a, *diagA, *offdiagA;
2516   PetscInt       r;
2517   PetscErrorCode ierr;
2518 
2519   PetscFunctionBegin;
2520   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2521   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2522   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2523   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2524   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2525   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2526   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2527   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2528   for(r = 0; r < n; ++r) {
2529     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2530       a[r]   = diagA[r];
2531       idx[r] = cstart + diagIdx[r];
2532     } else {
2533       a[r]   = offdiagA[r];
2534       idx[r] = cmap[offdiagIdx[r]];
2535     }
2536   }
2537   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2538   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2539   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2540   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2541   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2542   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2548 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2549 {
2550   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2551   PetscInt       n      = A->rmap->n;
2552   PetscInt       cstart = A->cmap->rstart;
2553   PetscInt      *cmap   = mat->garray;
2554   PetscInt      *diagIdx, *offdiagIdx;
2555   Vec            diagV, offdiagV;
2556   PetscScalar   *a, *diagA, *offdiagA;
2557   PetscInt       r;
2558   PetscErrorCode ierr;
2559 
2560   PetscFunctionBegin;
2561   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2562   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2563   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2564   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2565   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2566   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2567   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2568   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2569   for(r = 0; r < n; ++r) {
2570     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2571       a[r]   = diagA[r];
2572       idx[r] = cstart + diagIdx[r];
2573     } else {
2574       a[r]   = offdiagA[r];
2575       idx[r] = cmap[offdiagIdx[r]];
2576     }
2577   }
2578   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2579   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2580   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2581   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2582   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2583   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2589 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2590 {
2591   PetscErrorCode ierr;
2592   Mat            *dummy;
2593 
2594   PetscFunctionBegin;
2595   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2596   *newmat = *dummy;
2597   ierr = PetscFree(dummy);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2602 /* -------------------------------------------------------------------*/
2603 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2604        MatGetRow_MPIAIJ,
2605        MatRestoreRow_MPIAIJ,
2606        MatMult_MPIAIJ,
2607 /* 4*/ MatMultAdd_MPIAIJ,
2608        MatMultTranspose_MPIAIJ,
2609        MatMultTransposeAdd_MPIAIJ,
2610 #ifdef PETSC_HAVE_PBGL
2611        MatSolve_MPIAIJ,
2612 #else
2613        0,
2614 #endif
2615        0,
2616        0,
2617 /*10*/ 0,
2618        0,
2619        0,
2620        MatSOR_MPIAIJ,
2621        MatTranspose_MPIAIJ,
2622 /*15*/ MatGetInfo_MPIAIJ,
2623        MatEqual_MPIAIJ,
2624        MatGetDiagonal_MPIAIJ,
2625        MatDiagonalScale_MPIAIJ,
2626        MatNorm_MPIAIJ,
2627 /*20*/ MatAssemblyBegin_MPIAIJ,
2628        MatAssemblyEnd_MPIAIJ,
2629        MatSetOption_MPIAIJ,
2630        MatZeroEntries_MPIAIJ,
2631 /*24*/ MatZeroRows_MPIAIJ,
2632        0,
2633 #ifdef PETSC_HAVE_PBGL
2634        0,
2635 #else
2636        0,
2637 #endif
2638        0,
2639        0,
2640 /*29*/ MatSetUpPreallocation_MPIAIJ,
2641 #ifdef PETSC_HAVE_PBGL
2642        0,
2643 #else
2644        0,
2645 #endif
2646        0,
2647        0,
2648        0,
2649 /*34*/ MatDuplicate_MPIAIJ,
2650        0,
2651        0,
2652        0,
2653        0,
2654 /*39*/ MatAXPY_MPIAIJ,
2655        MatGetSubMatrices_MPIAIJ,
2656        MatIncreaseOverlap_MPIAIJ,
2657        MatGetValues_MPIAIJ,
2658        MatCopy_MPIAIJ,
2659 /*44*/ MatGetRowMax_MPIAIJ,
2660        MatScale_MPIAIJ,
2661        0,
2662        0,
2663        0,
2664 /*49*/ MatSetBlockSize_MPIAIJ,
2665        0,
2666        0,
2667        0,
2668        0,
2669 /*54*/ MatFDColoringCreate_MPIAIJ,
2670        0,
2671        MatSetUnfactored_MPIAIJ,
2672        MatPermute_MPIAIJ,
2673        0,
2674 /*59*/ MatGetSubMatrix_MPIAIJ,
2675        MatDestroy_MPIAIJ,
2676        MatView_MPIAIJ,
2677        0,
2678        0,
2679 /*64*/ 0,
2680        0,
2681        0,
2682        0,
2683        0,
2684 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2685        MatGetRowMinAbs_MPIAIJ,
2686        0,
2687        MatSetColoring_MPIAIJ,
2688 #if defined(PETSC_HAVE_ADIC)
2689        MatSetValuesAdic_MPIAIJ,
2690 #else
2691        0,
2692 #endif
2693        MatSetValuesAdifor_MPIAIJ,
2694 /*75*/ MatFDColoringApply_AIJ,
2695        0,
2696        0,
2697        0,
2698        0,
2699 /*80*/ 0,
2700        0,
2701        0,
2702 /*83*/ MatLoad_MPIAIJ,
2703        0,
2704        0,
2705        0,
2706        0,
2707        0,
2708 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2709        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2710        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2711        MatPtAP_Basic,
2712        MatPtAPSymbolic_MPIAIJ,
2713 /*94*/ MatPtAPNumeric_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718 /*99*/ 0,
2719        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2720        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2721        MatConjugate_MPIAIJ,
2722        0,
2723 /*104*/MatSetValuesRow_MPIAIJ,
2724        MatRealPart_MPIAIJ,
2725        MatImaginaryPart_MPIAIJ,
2726        0,
2727        0,
2728 /*109*/0,
2729        MatGetRedundantMatrix_MPIAIJ,
2730        MatGetRowMin_MPIAIJ,
2731        0,
2732        0,
2733 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2734        0,
2735        0,
2736        0,
2737        0,
2738 /*119*/0,
2739        0,
2740        0,
2741        0,
2742        MatGetMultiProcBlock_MPIAIJ
2743 };
2744 
2745 /* ----------------------------------------------------------------------------------------*/
2746 
2747 EXTERN_C_BEGIN
2748 #undef __FUNCT__
2749 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2750 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2751 {
2752   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2753   PetscErrorCode ierr;
2754 
2755   PetscFunctionBegin;
2756   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2757   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2758   PetscFunctionReturn(0);
2759 }
2760 EXTERN_C_END
2761 
2762 EXTERN_C_BEGIN
2763 #undef __FUNCT__
2764 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2765 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2766 {
2767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2768   PetscErrorCode ierr;
2769 
2770   PetscFunctionBegin;
2771   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2772   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2773   PetscFunctionReturn(0);
2774 }
2775 EXTERN_C_END
2776 
2777 EXTERN_C_BEGIN
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2780 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2781 {
2782   Mat_MPIAIJ     *b;
2783   PetscErrorCode ierr;
2784   PetscInt       i;
2785 
2786   PetscFunctionBegin;
2787   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2788   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2789   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2790   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2791 
2792   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2793   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2794   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2795   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2796   if (d_nnz) {
2797     for (i=0; i<B->rmap->n; i++) {
2798       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2799     }
2800   }
2801   if (o_nnz) {
2802     for (i=0; i<B->rmap->n; i++) {
2803       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2804     }
2805   }
2806   b = (Mat_MPIAIJ*)B->data;
2807 
2808   if (!B->preallocated) {
2809     /* Explicitly create 2 MATSEQAIJ matrices. */
2810     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2811     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2812     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2813     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2814     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2815     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2816     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2817     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2818   }
2819 
2820   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2821   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2822   B->preallocated = PETSC_TRUE;
2823   PetscFunctionReturn(0);
2824 }
2825 EXTERN_C_END
2826 
2827 #undef __FUNCT__
2828 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2829 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2830 {
2831   Mat            mat;
2832   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2833   PetscErrorCode ierr;
2834 
2835   PetscFunctionBegin;
2836   *newmat       = 0;
2837   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2838   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2839   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2840   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2841   a    = (Mat_MPIAIJ*)mat->data;
2842 
2843   mat->factortype    = matin->factortype;
2844   mat->rmap->bs      = matin->rmap->bs;
2845   mat->assembled    = PETSC_TRUE;
2846   mat->insertmode   = NOT_SET_VALUES;
2847   mat->preallocated = PETSC_TRUE;
2848 
2849   a->size           = oldmat->size;
2850   a->rank           = oldmat->rank;
2851   a->donotstash     = oldmat->donotstash;
2852   a->roworiented    = oldmat->roworiented;
2853   a->rowindices     = 0;
2854   a->rowvalues      = 0;
2855   a->getrowactive   = PETSC_FALSE;
2856 
2857   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2858   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2859 
2860   if (oldmat->colmap) {
2861 #if defined (PETSC_USE_CTABLE)
2862     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2863 #else
2864     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2865     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2866     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2867 #endif
2868   } else a->colmap = 0;
2869   if (oldmat->garray) {
2870     PetscInt len;
2871     len  = oldmat->B->cmap->n;
2872     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2873     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2874     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2875   } else a->garray = 0;
2876 
2877   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2878   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2879   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2880   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2881   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2882   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2883   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2884   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2885   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2886   *newmat = mat;
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 /*
2891     Allows sending/receiving larger messages then 2 gigabytes in a single call
2892 */
2893 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2894 {
2895   int             ierr;
2896   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2897   PetscInt        i,numchunks;
2898   PetscMPIInt     icnt;
2899 
2900   numchunks = cnt/CHUNKSIZE + 1;
2901   for (i=0; i<numchunks; i++) {
2902     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2903     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2904     if (type == MPIU_INT) {
2905       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2906     } else if (type == MPIU_SCALAR) {
2907       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2908     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2909   }
2910   return 0;
2911 }
2912 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2913 {
2914   int             ierr;
2915   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2916   MPI_Status      status;
2917   PetscInt        i,numchunks;
2918   PetscMPIInt     icnt;
2919 
2920   numchunks = cnt/CHUNKSIZE + 1;
2921   for (i=0; i<numchunks; i++) {
2922     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2923     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2924     if (type == MPIU_INT) {
2925       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2926     } else if (type == MPIU_SCALAR) {
2927       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2928     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2929   }
2930   return 0;
2931 }
2932 
2933 #undef __FUNCT__
2934 #define __FUNCT__ "MatLoad_MPIAIJ"
2935 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2936 {
2937   PetscScalar    *vals,*svals;
2938   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2939   PetscErrorCode ierr;
2940   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2941   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2942   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2943   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2944   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2945   int            fd;
2946 
2947   PetscFunctionBegin;
2948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2949   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2950   if (!rank) {
2951     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2952     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2953     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2954   }
2955 
2956   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2957 
2958   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2959   M = header[1]; N = header[2];
2960   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2961   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2962   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2963 
2964   /* If global sizes are set, check if they are consistent with that given in the file */
2965   if (sizesset) {
2966     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2967   }
2968   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2969   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2970 
2971   /* determine ownership of all rows */
2972   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2973   else m = newMat->rmap->n; /* Set by user */
2974 
2975   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2976   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2977 
2978   /* First process needs enough room for process with most rows */
2979   if (!rank) {
2980     mmax       = rowners[1];
2981     for (i=2; i<size; i++) {
2982       mmax = PetscMax(mmax,rowners[i]);
2983     }
2984   } else mmax = m;
2985 
2986   rowners[0] = 0;
2987   for (i=2; i<=size; i++) {
2988     rowners[i] += rowners[i-1];
2989   }
2990   rstart = rowners[rank];
2991   rend   = rowners[rank+1];
2992 
2993   /* distribute row lengths to all processors */
2994   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2995   if (!rank) {
2996     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2997     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2998     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2999     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3000     for (j=0; j<m; j++) {
3001       procsnz[0] += ourlens[j];
3002     }
3003     for (i=1; i<size; i++) {
3004       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3005       /* calculate the number of nonzeros on each processor */
3006       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3007         procsnz[i] += rowlengths[j];
3008       }
3009       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3010     }
3011     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3012   } else {
3013     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3014   }
3015 
3016   if (!rank) {
3017     /* determine max buffer needed and allocate it */
3018     maxnz = 0;
3019     for (i=0; i<size; i++) {
3020       maxnz = PetscMax(maxnz,procsnz[i]);
3021     }
3022     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3023 
3024     /* read in my part of the matrix column indices  */
3025     nz   = procsnz[0];
3026     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3027     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3028 
3029     /* read in every one elses and ship off */
3030     for (i=1; i<size; i++) {
3031       nz     = procsnz[i];
3032       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3033       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3034     }
3035     ierr = PetscFree(cols);CHKERRQ(ierr);
3036   } else {
3037     /* determine buffer space needed for message */
3038     nz = 0;
3039     for (i=0; i<m; i++) {
3040       nz += ourlens[i];
3041     }
3042     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3043 
3044     /* receive message of column indices*/
3045     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3046   }
3047 
3048   /* determine column ownership if matrix is not square */
3049   if (N != M) {
3050     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3051     else n = newMat->cmap->n;
3052     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3053     cstart = cend - n;
3054   } else {
3055     cstart = rstart;
3056     cend   = rend;
3057     n      = cend - cstart;
3058   }
3059 
3060   /* loop over local rows, determining number of off diagonal entries */
3061   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3062   jj = 0;
3063   for (i=0; i<m; i++) {
3064     for (j=0; j<ourlens[i]; j++) {
3065       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3066       jj++;
3067     }
3068   }
3069 
3070   for (i=0; i<m; i++) {
3071     ourlens[i] -= offlens[i];
3072   }
3073   if (!sizesset) {
3074     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3075   }
3076   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3077 
3078   for (i=0; i<m; i++) {
3079     ourlens[i] += offlens[i];
3080   }
3081 
3082   if (!rank) {
3083     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3084 
3085     /* read in my part of the matrix numerical values  */
3086     nz   = procsnz[0];
3087     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3088 
3089     /* insert into matrix */
3090     jj      = rstart;
3091     smycols = mycols;
3092     svals   = vals;
3093     for (i=0; i<m; i++) {
3094       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3095       smycols += ourlens[i];
3096       svals   += ourlens[i];
3097       jj++;
3098     }
3099 
3100     /* read in other processors and ship out */
3101     for (i=1; i<size; i++) {
3102       nz     = procsnz[i];
3103       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3104       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3105     }
3106     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3107   } else {
3108     /* receive numeric values */
3109     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3110 
3111     /* receive message of values*/
3112     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3113 
3114     /* insert into matrix */
3115     jj      = rstart;
3116     smycols = mycols;
3117     svals   = vals;
3118     for (i=0; i<m; i++) {
3119       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3120       smycols += ourlens[i];
3121       svals   += ourlens[i];
3122       jj++;
3123     }
3124   }
3125   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3126   ierr = PetscFree(vals);CHKERRQ(ierr);
3127   ierr = PetscFree(mycols);CHKERRQ(ierr);
3128   ierr = PetscFree(rowners);CHKERRQ(ierr);
3129 
3130   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3131   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3132   PetscFunctionReturn(0);
3133 }
3134 
3135 #undef __FUNCT__
3136 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3137 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3138 {
3139   PetscErrorCode ierr;
3140   IS             iscol_local;
3141   PetscInt       csize;
3142 
3143   PetscFunctionBegin;
3144   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3145   if (call == MAT_REUSE_MATRIX) {
3146     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3147     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3148   } else {
3149     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3150   }
3151   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3152   if (call == MAT_INITIAL_MATRIX) {
3153     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3154     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3155   }
3156   PetscFunctionReturn(0);
3157 }
3158 
3159 #undef __FUNCT__
3160 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3161 /*
3162     Not great since it makes two copies of the submatrix, first an SeqAIJ
3163   in local and then by concatenating the local matrices the end result.
3164   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3165 
3166   Note: This requires a sequential iscol with all indices.
3167 */
3168 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3169 {
3170   PetscErrorCode ierr;
3171   PetscMPIInt    rank,size;
3172   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3173   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3174   Mat            *local,M,Mreuse;
3175   MatScalar      *vwork,*aa;
3176   MPI_Comm       comm = ((PetscObject)mat)->comm;
3177   Mat_SeqAIJ     *aij;
3178 
3179 
3180   PetscFunctionBegin;
3181   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3182   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3183 
3184   if (call ==  MAT_REUSE_MATRIX) {
3185     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3186     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3187     local = &Mreuse;
3188     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3189   } else {
3190     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3191     Mreuse = *local;
3192     ierr   = PetscFree(local);CHKERRQ(ierr);
3193   }
3194 
3195   /*
3196       m - number of local rows
3197       n - number of columns (same on all processors)
3198       rstart - first row in new global matrix generated
3199   */
3200   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3201   if (call == MAT_INITIAL_MATRIX) {
3202     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3203     ii  = aij->i;
3204     jj  = aij->j;
3205 
3206     /*
3207         Determine the number of non-zeros in the diagonal and off-diagonal
3208         portions of the matrix in order to do correct preallocation
3209     */
3210 
3211     /* first get start and end of "diagonal" columns */
3212     if (csize == PETSC_DECIDE) {
3213       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3214       if (mglobal == n) { /* square matrix */
3215 	nlocal = m;
3216       } else {
3217         nlocal = n/size + ((n % size) > rank);
3218       }
3219     } else {
3220       nlocal = csize;
3221     }
3222     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3223     rstart = rend - nlocal;
3224     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3225 
3226     /* next, compute all the lengths */
3227     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3228     olens = dlens + m;
3229     for (i=0; i<m; i++) {
3230       jend = ii[i+1] - ii[i];
3231       olen = 0;
3232       dlen = 0;
3233       for (j=0; j<jend; j++) {
3234         if (*jj < rstart || *jj >= rend) olen++;
3235         else dlen++;
3236         jj++;
3237       }
3238       olens[i] = olen;
3239       dlens[i] = dlen;
3240     }
3241     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3242     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3243     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3244     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3245     ierr = PetscFree(dlens);CHKERRQ(ierr);
3246   } else {
3247     PetscInt ml,nl;
3248 
3249     M = *newmat;
3250     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3251     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3252     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3253     /*
3254          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3255        rather than the slower MatSetValues().
3256     */
3257     M->was_assembled = PETSC_TRUE;
3258     M->assembled     = PETSC_FALSE;
3259   }
3260   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3261   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3262   ii  = aij->i;
3263   jj  = aij->j;
3264   aa  = aij->a;
3265   for (i=0; i<m; i++) {
3266     row   = rstart + i;
3267     nz    = ii[i+1] - ii[i];
3268     cwork = jj;     jj += nz;
3269     vwork = aa;     aa += nz;
3270     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3271   }
3272 
3273   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3274   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3275   *newmat = M;
3276 
3277   /* save submatrix used in processor for next request */
3278   if (call ==  MAT_INITIAL_MATRIX) {
3279     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3280     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3281   }
3282 
3283   PetscFunctionReturn(0);
3284 }
3285 
3286 EXTERN_C_BEGIN
3287 #undef __FUNCT__
3288 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3289 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3290 {
3291   PetscInt       m,cstart, cend,j,nnz,i,d;
3292   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3293   const PetscInt *JJ;
3294   PetscScalar    *values;
3295   PetscErrorCode ierr;
3296 
3297   PetscFunctionBegin;
3298   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3299 
3300   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3301   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3302   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3303   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3304   m      = B->rmap->n;
3305   cstart = B->cmap->rstart;
3306   cend   = B->cmap->rend;
3307   rstart = B->rmap->rstart;
3308 
3309   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3310 
3311 #if defined(PETSC_USE_DEBUGGING)
3312   for (i=0; i<m; i++) {
3313     nnz     = Ii[i+1]- Ii[i];
3314     JJ      = J + Ii[i];
3315     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3316     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3317     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3318   }
3319 #endif
3320 
3321   for (i=0; i<m; i++) {
3322     nnz     = Ii[i+1]- Ii[i];
3323     JJ      = J + Ii[i];
3324     nnz_max = PetscMax(nnz_max,nnz);
3325     d       = 0;
3326     for (j=0; j<nnz; j++) {
3327       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3328     }
3329     d_nnz[i] = d;
3330     o_nnz[i] = nnz - d;
3331   }
3332   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3333   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3334 
3335   if (v) values = (PetscScalar*)v;
3336   else {
3337     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3338     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3339   }
3340 
3341   for (i=0; i<m; i++) {
3342     ii   = i + rstart;
3343     nnz  = Ii[i+1]- Ii[i];
3344     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3345   }
3346   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3347   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3348 
3349   if (!v) {
3350     ierr = PetscFree(values);CHKERRQ(ierr);
3351   }
3352   PetscFunctionReturn(0);
3353 }
3354 EXTERN_C_END
3355 
3356 #undef __FUNCT__
3357 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3358 /*@
3359    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3360    (the default parallel PETSc format).
3361 
3362    Collective on MPI_Comm
3363 
3364    Input Parameters:
3365 +  B - the matrix
3366 .  i - the indices into j for the start of each local row (starts with zero)
3367 .  j - the column indices for each local row (starts with zero)
3368 -  v - optional values in the matrix
3369 
3370    Level: developer
3371 
3372    Notes:
3373        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3374      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3375      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3376 
3377        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3378 
3379        The format which is used for the sparse matrix input, is equivalent to a
3380     row-major ordering.. i.e for the following matrix, the input data expected is
3381     as shown:
3382 
3383         1 0 0
3384         2 0 3     P0
3385        -------
3386         4 5 6     P1
3387 
3388      Process0 [P0]: rows_owned=[0,1]
3389         i =  {0,1,3}  [size = nrow+1  = 2+1]
3390         j =  {0,0,2}  [size = nz = 6]
3391         v =  {1,2,3}  [size = nz = 6]
3392 
3393      Process1 [P1]: rows_owned=[2]
3394         i =  {0,3}    [size = nrow+1  = 1+1]
3395         j =  {0,1,2}  [size = nz = 6]
3396         v =  {4,5,6}  [size = nz = 6]
3397 
3398 .keywords: matrix, aij, compressed row, sparse, parallel
3399 
3400 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3401           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3402 @*/
3403 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3404 {
3405   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3406 
3407   PetscFunctionBegin;
3408   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3409   if (f) {
3410     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3411   }
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 #undef __FUNCT__
3416 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3417 /*@C
3418    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3419    (the default parallel PETSc format).  For good matrix assembly performance
3420    the user should preallocate the matrix storage by setting the parameters
3421    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3422    performance can be increased by more than a factor of 50.
3423 
3424    Collective on MPI_Comm
3425 
3426    Input Parameters:
3427 +  A - the matrix
3428 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3429            (same value is used for all local rows)
3430 .  d_nnz - array containing the number of nonzeros in the various rows of the
3431            DIAGONAL portion of the local submatrix (possibly different for each row)
3432            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3433            The size of this array is equal to the number of local rows, i.e 'm'.
3434            You must leave room for the diagonal entry even if it is zero.
3435 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3436            submatrix (same value is used for all local rows).
3437 -  o_nnz - array containing the number of nonzeros in the various rows of the
3438            OFF-DIAGONAL portion of the local submatrix (possibly different for
3439            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3440            structure. The size of this array is equal to the number
3441            of local rows, i.e 'm'.
3442 
3443    If the *_nnz parameter is given then the *_nz parameter is ignored
3444 
3445    The AIJ format (also called the Yale sparse matrix format or
3446    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3447    storage.  The stored row and column indices begin with zero.
3448    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3449 
3450    The parallel matrix is partitioned such that the first m0 rows belong to
3451    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3452    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3453 
3454    The DIAGONAL portion of the local submatrix of a processor can be defined
3455    as the submatrix which is obtained by extraction the part corresponding to
3456    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3457    first row that belongs to the processor, r2 is the last row belonging to
3458    the this processor, and c1-c2 is range of indices of the local part of a
3459    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3460    common case of a square matrix, the row and column ranges are the same and
3461    the DIAGONAL part is also square. The remaining portion of the local
3462    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3463 
3464    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3465 
3466    You can call MatGetInfo() to get information on how effective the preallocation was;
3467    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3468    You can also run with the option -info and look for messages with the string
3469    malloc in them to see if additional memory allocation was needed.
3470 
3471    Example usage:
3472 
3473    Consider the following 8x8 matrix with 34 non-zero values, that is
3474    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3475    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3476    as follows:
3477 
3478 .vb
3479             1  2  0  |  0  3  0  |  0  4
3480     Proc0   0  5  6  |  7  0  0  |  8  0
3481             9  0 10  | 11  0  0  | 12  0
3482     -------------------------------------
3483            13  0 14  | 15 16 17  |  0  0
3484     Proc1   0 18  0  | 19 20 21  |  0  0
3485             0  0  0  | 22 23  0  | 24  0
3486     -------------------------------------
3487     Proc2  25 26 27  |  0  0 28  | 29  0
3488            30  0  0  | 31 32 33  |  0 34
3489 .ve
3490 
3491    This can be represented as a collection of submatrices as:
3492 
3493 .vb
3494       A B C
3495       D E F
3496       G H I
3497 .ve
3498 
3499    Where the submatrices A,B,C are owned by proc0, D,E,F are
3500    owned by proc1, G,H,I are owned by proc2.
3501 
3502    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3503    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3504    The 'M','N' parameters are 8,8, and have the same values on all procs.
3505 
3506    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3507    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3508    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3509    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3510    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3511    matrix, ans [DF] as another SeqAIJ matrix.
3512 
3513    When d_nz, o_nz parameters are specified, d_nz storage elements are
3514    allocated for every row of the local diagonal submatrix, and o_nz
3515    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3516    One way to choose d_nz and o_nz is to use the max nonzerors per local
3517    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3518    In this case, the values of d_nz,o_nz are:
3519 .vb
3520      proc0 : dnz = 2, o_nz = 2
3521      proc1 : dnz = 3, o_nz = 2
3522      proc2 : dnz = 1, o_nz = 4
3523 .ve
3524    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3525    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3526    for proc3. i.e we are using 12+15+10=37 storage locations to store
3527    34 values.
3528 
3529    When d_nnz, o_nnz parameters are specified, the storage is specified
3530    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3531    In the above case the values for d_nnz,o_nnz are:
3532 .vb
3533      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3534      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3535      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3536 .ve
3537    Here the space allocated is sum of all the above values i.e 34, and
3538    hence pre-allocation is perfect.
3539 
3540    Level: intermediate
3541 
3542 .keywords: matrix, aij, compressed row, sparse, parallel
3543 
3544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3545           MPIAIJ, MatGetInfo()
3546 @*/
3547 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3548 {
3549   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3550 
3551   PetscFunctionBegin;
3552   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3553   if (f) {
3554     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3555   }
3556   PetscFunctionReturn(0);
3557 }
3558 
3559 #undef __FUNCT__
3560 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3561 /*@
3562      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3563          CSR format the local rows.
3564 
3565    Collective on MPI_Comm
3566 
3567    Input Parameters:
3568 +  comm - MPI communicator
3569 .  m - number of local rows (Cannot be PETSC_DECIDE)
3570 .  n - This value should be the same as the local size used in creating the
3571        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3572        calculated if N is given) For square matrices n is almost always m.
3573 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3574 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3575 .   i - row indices
3576 .   j - column indices
3577 -   a - matrix values
3578 
3579    Output Parameter:
3580 .   mat - the matrix
3581 
3582    Level: intermediate
3583 
3584    Notes:
3585        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3586      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3587      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3588 
3589        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3590 
3591        The format which is used for the sparse matrix input, is equivalent to a
3592     row-major ordering.. i.e for the following matrix, the input data expected is
3593     as shown:
3594 
3595         1 0 0
3596         2 0 3     P0
3597        -------
3598         4 5 6     P1
3599 
3600      Process0 [P0]: rows_owned=[0,1]
3601         i =  {0,1,3}  [size = nrow+1  = 2+1]
3602         j =  {0,0,2}  [size = nz = 6]
3603         v =  {1,2,3}  [size = nz = 6]
3604 
3605      Process1 [P1]: rows_owned=[2]
3606         i =  {0,3}    [size = nrow+1  = 1+1]
3607         j =  {0,1,2}  [size = nz = 6]
3608         v =  {4,5,6}  [size = nz = 6]
3609 
3610 .keywords: matrix, aij, compressed row, sparse, parallel
3611 
3612 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3613           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3614 @*/
3615 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3616 {
3617   PetscErrorCode ierr;
3618 
3619  PetscFunctionBegin;
3620   if (i[0]) {
3621     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3622   }
3623   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3624   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3625   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3626   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3627   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3628   PetscFunctionReturn(0);
3629 }
3630 
3631 #undef __FUNCT__
3632 #define __FUNCT__ "MatCreateMPIAIJ"
3633 /*@C
3634    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3635    (the default parallel PETSc format).  For good matrix assembly performance
3636    the user should preallocate the matrix storage by setting the parameters
3637    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3638    performance can be increased by more than a factor of 50.
3639 
3640    Collective on MPI_Comm
3641 
3642    Input Parameters:
3643 +  comm - MPI communicator
3644 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3645            This value should be the same as the local size used in creating the
3646            y vector for the matrix-vector product y = Ax.
3647 .  n - This value should be the same as the local size used in creating the
3648        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3649        calculated if N is given) For square matrices n is almost always m.
3650 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3651 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3652 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3653            (same value is used for all local rows)
3654 .  d_nnz - array containing the number of nonzeros in the various rows of the
3655            DIAGONAL portion of the local submatrix (possibly different for each row)
3656            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3657            The size of this array is equal to the number of local rows, i.e 'm'.
3658            You must leave room for the diagonal entry even if it is zero.
3659 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3660            submatrix (same value is used for all local rows).
3661 -  o_nnz - array containing the number of nonzeros in the various rows of the
3662            OFF-DIAGONAL portion of the local submatrix (possibly different for
3663            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3664            structure. The size of this array is equal to the number
3665            of local rows, i.e 'm'.
3666 
3667    Output Parameter:
3668 .  A - the matrix
3669 
3670    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3671    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3672    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3673 
3674    Notes:
3675    If the *_nnz parameter is given then the *_nz parameter is ignored
3676 
3677    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3678    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3679    storage requirements for this matrix.
3680 
3681    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3682    processor than it must be used on all processors that share the object for
3683    that argument.
3684 
3685    The user MUST specify either the local or global matrix dimensions
3686    (possibly both).
3687 
3688    The parallel matrix is partitioned across processors such that the
3689    first m0 rows belong to process 0, the next m1 rows belong to
3690    process 1, the next m2 rows belong to process 2 etc.. where
3691    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3692    values corresponding to [m x N] submatrix.
3693 
3694    The columns are logically partitioned with the n0 columns belonging
3695    to 0th partition, the next n1 columns belonging to the next
3696    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3697 
3698    The DIAGONAL portion of the local submatrix on any given processor
3699    is the submatrix corresponding to the rows and columns m,n
3700    corresponding to the given processor. i.e diagonal matrix on
3701    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3702    etc. The remaining portion of the local submatrix [m x (N-n)]
3703    constitute the OFF-DIAGONAL portion. The example below better
3704    illustrates this concept.
3705 
3706    For a square global matrix we define each processor's diagonal portion
3707    to be its local rows and the corresponding columns (a square submatrix);
3708    each processor's off-diagonal portion encompasses the remainder of the
3709    local matrix (a rectangular submatrix).
3710 
3711    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3712 
3713    When calling this routine with a single process communicator, a matrix of
3714    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3715    type of communicator, use the construction mechanism:
3716      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3717 
3718    By default, this format uses inodes (identical nodes) when possible.
3719    We search for consecutive rows with the same nonzero structure, thereby
3720    reusing matrix information to achieve increased efficiency.
3721 
3722    Options Database Keys:
3723 +  -mat_no_inode  - Do not use inodes
3724 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3725 -  -mat_aij_oneindex - Internally use indexing starting at 1
3726         rather than 0.  Note that when calling MatSetValues(),
3727         the user still MUST index entries starting at 0!
3728 
3729 
3730    Example usage:
3731 
3732    Consider the following 8x8 matrix with 34 non-zero values, that is
3733    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3734    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3735    as follows:
3736 
3737 .vb
3738             1  2  0  |  0  3  0  |  0  4
3739     Proc0   0  5  6  |  7  0  0  |  8  0
3740             9  0 10  | 11  0  0  | 12  0
3741     -------------------------------------
3742            13  0 14  | 15 16 17  |  0  0
3743     Proc1   0 18  0  | 19 20 21  |  0  0
3744             0  0  0  | 22 23  0  | 24  0
3745     -------------------------------------
3746     Proc2  25 26 27  |  0  0 28  | 29  0
3747            30  0  0  | 31 32 33  |  0 34
3748 .ve
3749 
3750    This can be represented as a collection of submatrices as:
3751 
3752 .vb
3753       A B C
3754       D E F
3755       G H I
3756 .ve
3757 
3758    Where the submatrices A,B,C are owned by proc0, D,E,F are
3759    owned by proc1, G,H,I are owned by proc2.
3760 
3761    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3762    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3763    The 'M','N' parameters are 8,8, and have the same values on all procs.
3764 
3765    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3766    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3767    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3768    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3769    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3770    matrix, ans [DF] as another SeqAIJ matrix.
3771 
3772    When d_nz, o_nz parameters are specified, d_nz storage elements are
3773    allocated for every row of the local diagonal submatrix, and o_nz
3774    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3775    One way to choose d_nz and o_nz is to use the max nonzerors per local
3776    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3777    In this case, the values of d_nz,o_nz are:
3778 .vb
3779      proc0 : dnz = 2, o_nz = 2
3780      proc1 : dnz = 3, o_nz = 2
3781      proc2 : dnz = 1, o_nz = 4
3782 .ve
3783    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3784    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3785    for proc3. i.e we are using 12+15+10=37 storage locations to store
3786    34 values.
3787 
3788    When d_nnz, o_nnz parameters are specified, the storage is specified
3789    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3790    In the above case the values for d_nnz,o_nnz are:
3791 .vb
3792      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3793      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3794      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3795 .ve
3796    Here the space allocated is sum of all the above values i.e 34, and
3797    hence pre-allocation is perfect.
3798 
3799    Level: intermediate
3800 
3801 .keywords: matrix, aij, compressed row, sparse, parallel
3802 
3803 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3804           MPIAIJ, MatCreateMPIAIJWithArrays()
3805 @*/
3806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3807 {
3808   PetscErrorCode ierr;
3809   PetscMPIInt    size;
3810 
3811   PetscFunctionBegin;
3812   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3813   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3814   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3815   if (size > 1) {
3816     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3817     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3818   } else {
3819     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3820     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3821   }
3822   PetscFunctionReturn(0);
3823 }
3824 
3825 #undef __FUNCT__
3826 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3827 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3828 {
3829   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3830 
3831   PetscFunctionBegin;
3832   *Ad     = a->A;
3833   *Ao     = a->B;
3834   *colmap = a->garray;
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3840 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3841 {
3842   PetscErrorCode ierr;
3843   PetscInt       i;
3844   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3845 
3846   PetscFunctionBegin;
3847   if (coloring->ctype == IS_COLORING_GLOBAL) {
3848     ISColoringValue *allcolors,*colors;
3849     ISColoring      ocoloring;
3850 
3851     /* set coloring for diagonal portion */
3852     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3853 
3854     /* set coloring for off-diagonal portion */
3855     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3856     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3857     for (i=0; i<a->B->cmap->n; i++) {
3858       colors[i] = allcolors[a->garray[i]];
3859     }
3860     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3861     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3862     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3863     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3864   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3865     ISColoringValue *colors;
3866     PetscInt        *larray;
3867     ISColoring      ocoloring;
3868 
3869     /* set coloring for diagonal portion */
3870     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3871     for (i=0; i<a->A->cmap->n; i++) {
3872       larray[i] = i + A->cmap->rstart;
3873     }
3874     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3875     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3876     for (i=0; i<a->A->cmap->n; i++) {
3877       colors[i] = coloring->colors[larray[i]];
3878     }
3879     ierr = PetscFree(larray);CHKERRQ(ierr);
3880     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3881     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3882     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3883 
3884     /* set coloring for off-diagonal portion */
3885     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3886     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3887     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3888     for (i=0; i<a->B->cmap->n; i++) {
3889       colors[i] = coloring->colors[larray[i]];
3890     }
3891     ierr = PetscFree(larray);CHKERRQ(ierr);
3892     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3893     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3894     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3895   } else {
3896     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3897   }
3898 
3899   PetscFunctionReturn(0);
3900 }
3901 
3902 #if defined(PETSC_HAVE_ADIC)
3903 #undef __FUNCT__
3904 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3905 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3906 {
3907   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3908   PetscErrorCode ierr;
3909 
3910   PetscFunctionBegin;
3911   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3912   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3913   PetscFunctionReturn(0);
3914 }
3915 #endif
3916 
3917 #undef __FUNCT__
3918 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3919 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3920 {
3921   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3922   PetscErrorCode ierr;
3923 
3924   PetscFunctionBegin;
3925   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3926   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3927   PetscFunctionReturn(0);
3928 }
3929 
3930 #undef __FUNCT__
3931 #define __FUNCT__ "MatMerge"
3932 /*@
3933       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3934                  matrices from each processor
3935 
3936     Collective on MPI_Comm
3937 
3938    Input Parameters:
3939 +    comm - the communicators the parallel matrix will live on
3940 .    inmat - the input sequential matrices
3941 .    n - number of local columns (or PETSC_DECIDE)
3942 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3943 
3944    Output Parameter:
3945 .    outmat - the parallel matrix generated
3946 
3947     Level: advanced
3948 
3949    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3950 
3951 @*/
3952 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3953 {
3954   PetscErrorCode ierr;
3955   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3956   PetscInt       *indx;
3957   PetscScalar    *values;
3958 
3959   PetscFunctionBegin;
3960   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3961   if (scall == MAT_INITIAL_MATRIX){
3962     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3963     if (n == PETSC_DECIDE){
3964       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3965     }
3966     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3967     rstart -= m;
3968 
3969     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3970     for (i=0;i<m;i++) {
3971       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3972       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3973       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3974     }
3975     /* This routine will ONLY return MPIAIJ type matrix */
3976     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3977     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3978     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3979     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3980     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3981 
3982   } else if (scall == MAT_REUSE_MATRIX){
3983     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3984   } else {
3985     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3986   }
3987 
3988   for (i=0;i<m;i++) {
3989     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3990     Ii    = i + rstart;
3991     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3992     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3993   }
3994   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3995   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3996   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3997 
3998   PetscFunctionReturn(0);
3999 }
4000 
4001 #undef __FUNCT__
4002 #define __FUNCT__ "MatFileSplit"
4003 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4004 {
4005   PetscErrorCode    ierr;
4006   PetscMPIInt       rank;
4007   PetscInt          m,N,i,rstart,nnz;
4008   size_t            len;
4009   const PetscInt    *indx;
4010   PetscViewer       out;
4011   char              *name;
4012   Mat               B;
4013   const PetscScalar *values;
4014 
4015   PetscFunctionBegin;
4016   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4017   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4018   /* Should this be the type of the diagonal block of A? */
4019   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4020   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4021   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4022   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4023   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4024   for (i=0;i<m;i++) {
4025     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4026     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4027     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4028   }
4029   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4030   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4031 
4032   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4033   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4034   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4035   sprintf(name,"%s.%d",outfile,rank);
4036   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4037   ierr = PetscFree(name);
4038   ierr = MatView(B,out);CHKERRQ(ierr);
4039   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4040   ierr = MatDestroy(B);CHKERRQ(ierr);
4041   PetscFunctionReturn(0);
4042 }
4043 
4044 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4045 #undef __FUNCT__
4046 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4047 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4048 {
4049   PetscErrorCode       ierr;
4050   Mat_Merge_SeqsToMPI  *merge;
4051   PetscContainer       container;
4052 
4053   PetscFunctionBegin;
4054   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4055   if (container) {
4056     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4057     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4058     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4059     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4060     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4061     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4062     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4063     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4064     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4065     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4066     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4067     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4068     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4069     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4070 
4071     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4072     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4073   }
4074   ierr = PetscFree(merge);CHKERRQ(ierr);
4075 
4076   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4077   PetscFunctionReturn(0);
4078 }
4079 
4080 #include "../src/mat/utils/freespace.h"
4081 #include "petscbt.h"
4082 
4083 #undef __FUNCT__
4084 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4085 /*@C
4086       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4087                  matrices from each processor
4088 
4089     Collective on MPI_Comm
4090 
4091    Input Parameters:
4092 +    comm - the communicators the parallel matrix will live on
4093 .    seqmat - the input sequential matrices
4094 .    m - number of local rows (or PETSC_DECIDE)
4095 .    n - number of local columns (or PETSC_DECIDE)
4096 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4097 
4098    Output Parameter:
4099 .    mpimat - the parallel matrix generated
4100 
4101     Level: advanced
4102 
4103    Notes:
4104      The dimensions of the sequential matrix in each processor MUST be the same.
4105      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4106      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4107 @*/
4108 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4109 {
4110   PetscErrorCode       ierr;
4111   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4112   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4113   PetscMPIInt          size,rank,taga,*len_s;
4114   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4115   PetscInt             proc,m;
4116   PetscInt             **buf_ri,**buf_rj;
4117   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4118   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4119   MPI_Request          *s_waits,*r_waits;
4120   MPI_Status           *status;
4121   MatScalar            *aa=a->a;
4122   MatScalar            **abuf_r,*ba_i;
4123   Mat_Merge_SeqsToMPI  *merge;
4124   PetscContainer       container;
4125 
4126   PetscFunctionBegin;
4127   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4128 
4129   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4130   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4131 
4132   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4133   if (container) {
4134     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4135   }
4136   bi     = merge->bi;
4137   bj     = merge->bj;
4138   buf_ri = merge->buf_ri;
4139   buf_rj = merge->buf_rj;
4140 
4141   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4142   owners = merge->rowmap->range;
4143   len_s  = merge->len_s;
4144 
4145   /* send and recv matrix values */
4146   /*-----------------------------*/
4147   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4148   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4149 
4150   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4151   for (proc=0,k=0; proc<size; proc++){
4152     if (!len_s[proc]) continue;
4153     i = owners[proc];
4154     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4155     k++;
4156   }
4157 
4158   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4159   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4160   ierr = PetscFree(status);CHKERRQ(ierr);
4161 
4162   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4163   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4164 
4165   /* insert mat values of mpimat */
4166   /*----------------------------*/
4167   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4168   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4169 
4170   for (k=0; k<merge->nrecv; k++){
4171     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4172     nrows = *(buf_ri_k[k]);
4173     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4174     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4175   }
4176 
4177   /* set values of ba */
4178   m = merge->rowmap->n;
4179   for (i=0; i<m; i++) {
4180     arow = owners[rank] + i;
4181     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4182     bnzi = bi[i+1] - bi[i];
4183     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4184 
4185     /* add local non-zero vals of this proc's seqmat into ba */
4186     anzi = ai[arow+1] - ai[arow];
4187     aj   = a->j + ai[arow];
4188     aa   = a->a + ai[arow];
4189     nextaj = 0;
4190     for (j=0; nextaj<anzi; j++){
4191       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4192         ba_i[j] += aa[nextaj++];
4193       }
4194     }
4195 
4196     /* add received vals into ba */
4197     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4198       /* i-th row */
4199       if (i == *nextrow[k]) {
4200         anzi = *(nextai[k]+1) - *nextai[k];
4201         aj   = buf_rj[k] + *(nextai[k]);
4202         aa   = abuf_r[k] + *(nextai[k]);
4203         nextaj = 0;
4204         for (j=0; nextaj<anzi; j++){
4205           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4206             ba_i[j] += aa[nextaj++];
4207           }
4208         }
4209         nextrow[k]++; nextai[k]++;
4210       }
4211     }
4212     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4213   }
4214   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4215   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4216 
4217   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4218   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4219   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4220   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4221   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4222   PetscFunctionReturn(0);
4223 }
4224 
4225 #undef __FUNCT__
4226 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4227 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4228 {
4229   PetscErrorCode       ierr;
4230   Mat                  B_mpi;
4231   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4232   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4233   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4234   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4235   PetscInt             len,proc,*dnz,*onz;
4236   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4237   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4238   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4239   MPI_Status           *status;
4240   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4241   PetscBT              lnkbt;
4242   Mat_Merge_SeqsToMPI  *merge;
4243   PetscContainer       container;
4244 
4245   PetscFunctionBegin;
4246   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4247 
4248   /* make sure it is a PETSc comm */
4249   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4251   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4252 
4253   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4254   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4255 
4256   /* determine row ownership */
4257   /*---------------------------------------------------------*/
4258   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4259   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4260   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4261   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4262   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4263   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4264   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4265 
4266   m      = merge->rowmap->n;
4267   M      = merge->rowmap->N;
4268   owners = merge->rowmap->range;
4269 
4270   /* determine the number of messages to send, their lengths */
4271   /*---------------------------------------------------------*/
4272   len_s  = merge->len_s;
4273 
4274   len = 0;  /* length of buf_si[] */
4275   merge->nsend = 0;
4276   for (proc=0; proc<size; proc++){
4277     len_si[proc] = 0;
4278     if (proc == rank){
4279       len_s[proc] = 0;
4280     } else {
4281       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4282       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4283     }
4284     if (len_s[proc]) {
4285       merge->nsend++;
4286       nrows = 0;
4287       for (i=owners[proc]; i<owners[proc+1]; i++){
4288         if (ai[i+1] > ai[i]) nrows++;
4289       }
4290       len_si[proc] = 2*(nrows+1);
4291       len += len_si[proc];
4292     }
4293   }
4294 
4295   /* determine the number and length of messages to receive for ij-structure */
4296   /*-------------------------------------------------------------------------*/
4297   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4298   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4299 
4300   /* post the Irecv of j-structure */
4301   /*-------------------------------*/
4302   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4303   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4304 
4305   /* post the Isend of j-structure */
4306   /*--------------------------------*/
4307   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4308 
4309   for (proc=0, k=0; proc<size; proc++){
4310     if (!len_s[proc]) continue;
4311     i = owners[proc];
4312     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4313     k++;
4314   }
4315 
4316   /* receives and sends of j-structure are complete */
4317   /*------------------------------------------------*/
4318   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4319   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4320 
4321   /* send and recv i-structure */
4322   /*---------------------------*/
4323   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4324   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4325 
4326   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4327   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4328   for (proc=0,k=0; proc<size; proc++){
4329     if (!len_s[proc]) continue;
4330     /* form outgoing message for i-structure:
4331          buf_si[0]:                 nrows to be sent
4332                [1:nrows]:           row index (global)
4333                [nrows+1:2*nrows+1]: i-structure index
4334     */
4335     /*-------------------------------------------*/
4336     nrows = len_si[proc]/2 - 1;
4337     buf_si_i    = buf_si + nrows+1;
4338     buf_si[0]   = nrows;
4339     buf_si_i[0] = 0;
4340     nrows = 0;
4341     for (i=owners[proc]; i<owners[proc+1]; i++){
4342       anzi = ai[i+1] - ai[i];
4343       if (anzi) {
4344         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4345         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4346         nrows++;
4347       }
4348     }
4349     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4350     k++;
4351     buf_si += len_si[proc];
4352   }
4353 
4354   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4355   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4356 
4357   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4358   for (i=0; i<merge->nrecv; i++){
4359     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4360   }
4361 
4362   ierr = PetscFree(len_si);CHKERRQ(ierr);
4363   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4364   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4365   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4366   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4367   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4368   ierr = PetscFree(status);CHKERRQ(ierr);
4369 
4370   /* compute a local seq matrix in each processor */
4371   /*----------------------------------------------*/
4372   /* allocate bi array and free space for accumulating nonzero column info */
4373   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4374   bi[0] = 0;
4375 
4376   /* create and initialize a linked list */
4377   nlnk = N+1;
4378   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4379 
4380   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4381   len = 0;
4382   len  = ai[owners[rank+1]] - ai[owners[rank]];
4383   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4384   current_space = free_space;
4385 
4386   /* determine symbolic info for each local row */
4387   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4388 
4389   for (k=0; k<merge->nrecv; k++){
4390     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4391     nrows = *buf_ri_k[k];
4392     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4393     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4394   }
4395 
4396   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4397   len = 0;
4398   for (i=0;i<m;i++) {
4399     bnzi   = 0;
4400     /* add local non-zero cols of this proc's seqmat into lnk */
4401     arow   = owners[rank] + i;
4402     anzi   = ai[arow+1] - ai[arow];
4403     aj     = a->j + ai[arow];
4404     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4405     bnzi += nlnk;
4406     /* add received col data into lnk */
4407     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4408       if (i == *nextrow[k]) { /* i-th row */
4409         anzi = *(nextai[k]+1) - *nextai[k];
4410         aj   = buf_rj[k] + *nextai[k];
4411         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4412         bnzi += nlnk;
4413         nextrow[k]++; nextai[k]++;
4414       }
4415     }
4416     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4417 
4418     /* if free space is not available, make more free space */
4419     if (current_space->local_remaining<bnzi) {
4420       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4421       nspacedouble++;
4422     }
4423     /* copy data into free space, then initialize lnk */
4424     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4425     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4426 
4427     current_space->array           += bnzi;
4428     current_space->local_used      += bnzi;
4429     current_space->local_remaining -= bnzi;
4430 
4431     bi[i+1] = bi[i] + bnzi;
4432   }
4433 
4434   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4435 
4436   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4437   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4438   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4439 
4440   /* create symbolic parallel matrix B_mpi */
4441   /*---------------------------------------*/
4442   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4443   if (n==PETSC_DECIDE) {
4444     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4445   } else {
4446     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4447   }
4448   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4449   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4450   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4451 
4452   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4453   B_mpi->assembled     = PETSC_FALSE;
4454   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4455   merge->bi            = bi;
4456   merge->bj            = bj;
4457   merge->buf_ri        = buf_ri;
4458   merge->buf_rj        = buf_rj;
4459   merge->coi           = PETSC_NULL;
4460   merge->coj           = PETSC_NULL;
4461   merge->owners_co     = PETSC_NULL;
4462 
4463   /* attach the supporting struct to B_mpi for reuse */
4464   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4465   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4466   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4467   *mpimat = B_mpi;
4468 
4469   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4470   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4471   PetscFunctionReturn(0);
4472 }
4473 
4474 #undef __FUNCT__
4475 #define __FUNCT__ "MatMerge_SeqsToMPI"
4476 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4477 {
4478   PetscErrorCode   ierr;
4479 
4480   PetscFunctionBegin;
4481   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4482   if (scall == MAT_INITIAL_MATRIX){
4483     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4484   }
4485   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4486   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4487   PetscFunctionReturn(0);
4488 }
4489 
4490 #undef __FUNCT__
4491 #define __FUNCT__ "MatGetLocalMat"
4492 /*@
4493      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4494 
4495     Not Collective
4496 
4497    Input Parameters:
4498 +    A - the matrix
4499 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4500 
4501    Output Parameter:
4502 .    A_loc - the local sequential matrix generated
4503 
4504     Level: developer
4505 
4506 @*/
4507 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4508 {
4509   PetscErrorCode  ierr;
4510   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4511   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4512   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4513   MatScalar       *aa=a->a,*ba=b->a,*cam;
4514   PetscScalar     *ca;
4515   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4516   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4517 
4518   PetscFunctionBegin;
4519   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4520   if (scall == MAT_INITIAL_MATRIX){
4521     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4522     ci[0] = 0;
4523     for (i=0; i<am; i++){
4524       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4525     }
4526     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4527     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4528     k = 0;
4529     for (i=0; i<am; i++) {
4530       ncols_o = bi[i+1] - bi[i];
4531       ncols_d = ai[i+1] - ai[i];
4532       /* off-diagonal portion of A */
4533       for (jo=0; jo<ncols_o; jo++) {
4534         col = cmap[*bj];
4535         if (col >= cstart) break;
4536         cj[k]   = col; bj++;
4537         ca[k++] = *ba++;
4538       }
4539       /* diagonal portion of A */
4540       for (j=0; j<ncols_d; j++) {
4541         cj[k]   = cstart + *aj++;
4542         ca[k++] = *aa++;
4543       }
4544       /* off-diagonal portion of A */
4545       for (j=jo; j<ncols_o; j++) {
4546         cj[k]   = cmap[*bj++];
4547         ca[k++] = *ba++;
4548       }
4549     }
4550     /* put together the new matrix */
4551     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4552     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4553     /* Since these are PETSc arrays, change flags to free them as necessary. */
4554     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4555     mat->free_a  = PETSC_TRUE;
4556     mat->free_ij = PETSC_TRUE;
4557     mat->nonew   = 0;
4558   } else if (scall == MAT_REUSE_MATRIX){
4559     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4560     ci = mat->i; cj = mat->j; cam = mat->a;
4561     for (i=0; i<am; i++) {
4562       /* off-diagonal portion of A */
4563       ncols_o = bi[i+1] - bi[i];
4564       for (jo=0; jo<ncols_o; jo++) {
4565         col = cmap[*bj];
4566         if (col >= cstart) break;
4567         *cam++ = *ba++; bj++;
4568       }
4569       /* diagonal portion of A */
4570       ncols_d = ai[i+1] - ai[i];
4571       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4572       /* off-diagonal portion of A */
4573       for (j=jo; j<ncols_o; j++) {
4574         *cam++ = *ba++; bj++;
4575       }
4576     }
4577   } else {
4578     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4579   }
4580 
4581   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4582   PetscFunctionReturn(0);
4583 }
4584 
4585 #undef __FUNCT__
4586 #define __FUNCT__ "MatGetLocalMatCondensed"
4587 /*@C
4588      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4589 
4590     Not Collective
4591 
4592    Input Parameters:
4593 +    A - the matrix
4594 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4595 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4596 
4597    Output Parameter:
4598 .    A_loc - the local sequential matrix generated
4599 
4600     Level: developer
4601 
4602 @*/
4603 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4604 {
4605   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4606   PetscErrorCode    ierr;
4607   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4608   IS                isrowa,iscola;
4609   Mat               *aloc;
4610 
4611   PetscFunctionBegin;
4612   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4613   if (!row){
4614     start = A->rmap->rstart; end = A->rmap->rend;
4615     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4616   } else {
4617     isrowa = *row;
4618   }
4619   if (!col){
4620     start = A->cmap->rstart;
4621     cmap  = a->garray;
4622     nzA   = a->A->cmap->n;
4623     nzB   = a->B->cmap->n;
4624     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4625     ncols = 0;
4626     for (i=0; i<nzB; i++) {
4627       if (cmap[i] < start) idx[ncols++] = cmap[i];
4628       else break;
4629     }
4630     imark = i;
4631     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4632     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4633     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4634     ierr = PetscFree(idx);CHKERRQ(ierr);
4635   } else {
4636     iscola = *col;
4637   }
4638   if (scall != MAT_INITIAL_MATRIX){
4639     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4640     aloc[0] = *A_loc;
4641   }
4642   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4643   *A_loc = aloc[0];
4644   ierr = PetscFree(aloc);CHKERRQ(ierr);
4645   if (!row){
4646     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4647   }
4648   if (!col){
4649     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4650   }
4651   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4652   PetscFunctionReturn(0);
4653 }
4654 
4655 #undef __FUNCT__
4656 #define __FUNCT__ "MatGetBrowsOfAcols"
4657 /*@C
4658     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4659 
4660     Collective on Mat
4661 
4662    Input Parameters:
4663 +    A,B - the matrices in mpiaij format
4664 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4665 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4666 
4667    Output Parameter:
4668 +    rowb, colb - index sets of rows and columns of B to extract
4669 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4670 -    B_seq - the sequential matrix generated
4671 
4672     Level: developer
4673 
4674 @*/
4675 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4676 {
4677   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4678   PetscErrorCode    ierr;
4679   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4680   IS                isrowb,iscolb;
4681   Mat               *bseq;
4682 
4683   PetscFunctionBegin;
4684   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4685     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4686   }
4687   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4688 
4689   if (scall == MAT_INITIAL_MATRIX){
4690     start = A->cmap->rstart;
4691     cmap  = a->garray;
4692     nzA   = a->A->cmap->n;
4693     nzB   = a->B->cmap->n;
4694     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4695     ncols = 0;
4696     for (i=0; i<nzB; i++) {  /* row < local row index */
4697       if (cmap[i] < start) idx[ncols++] = cmap[i];
4698       else break;
4699     }
4700     imark = i;
4701     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4702     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4703     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4704     ierr = PetscFree(idx);CHKERRQ(ierr);
4705     *brstart = imark;
4706     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4707   } else {
4708     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4709     isrowb = *rowb; iscolb = *colb;
4710     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4711     bseq[0] = *B_seq;
4712   }
4713   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4714   *B_seq = bseq[0];
4715   ierr = PetscFree(bseq);CHKERRQ(ierr);
4716   if (!rowb){
4717     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4718   } else {
4719     *rowb = isrowb;
4720   }
4721   if (!colb){
4722     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4723   } else {
4724     *colb = iscolb;
4725   }
4726   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4727   PetscFunctionReturn(0);
4728 }
4729 
4730 #undef __FUNCT__
4731 #define __FUNCT__ "MatGetBrowsOfAoCols"
4732 /*@C
4733     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4734     of the OFF-DIAGONAL portion of local A
4735 
4736     Collective on Mat
4737 
4738    Input Parameters:
4739 +    A,B - the matrices in mpiaij format
4740 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4741 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4742 .    startsj_r - similar to startsj for receives
4743 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4744 
4745    Output Parameter:
4746 +    B_oth - the sequential matrix generated
4747 
4748     Level: developer
4749 
4750 @*/
4751 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4752 {
4753   VecScatter_MPI_General *gen_to,*gen_from;
4754   PetscErrorCode         ierr;
4755   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4756   Mat_SeqAIJ             *b_oth;
4757   VecScatter             ctx=a->Mvctx;
4758   MPI_Comm               comm=((PetscObject)ctx)->comm;
4759   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4760   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4761   PetscScalar            *rvalues,*svalues;
4762   MatScalar              *b_otha,*bufa,*bufA;
4763   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4764   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4765   MPI_Status             *sstatus,rstatus;
4766   PetscMPIInt            jj;
4767   PetscInt               *cols,sbs,rbs;
4768   PetscScalar            *vals;
4769 
4770   PetscFunctionBegin;
4771   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4772     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4773   }
4774   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4775   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4776 
4777   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4778   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4779   rvalues  = gen_from->values; /* holds the length of receiving row */
4780   svalues  = gen_to->values;   /* holds the length of sending row */
4781   nrecvs   = gen_from->n;
4782   nsends   = gen_to->n;
4783 
4784   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4785   srow     = gen_to->indices;   /* local row index to be sent */
4786   sstarts  = gen_to->starts;
4787   sprocs   = gen_to->procs;
4788   sstatus  = gen_to->sstatus;
4789   sbs      = gen_to->bs;
4790   rstarts  = gen_from->starts;
4791   rprocs   = gen_from->procs;
4792   rbs      = gen_from->bs;
4793 
4794   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4795   if (scall == MAT_INITIAL_MATRIX){
4796     /* i-array */
4797     /*---------*/
4798     /*  post receives */
4799     for (i=0; i<nrecvs; i++){
4800       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4801       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4802       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4803     }
4804 
4805     /* pack the outgoing message */
4806     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4807     sstartsj[0] = 0;  rstartsj[0] = 0;
4808     len = 0; /* total length of j or a array to be sent */
4809     k = 0;
4810     for (i=0; i<nsends; i++){
4811       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4812       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4813       for (j=0; j<nrows; j++) {
4814         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4815         for (l=0; l<sbs; l++){
4816           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4817           rowlen[j*sbs+l] = ncols;
4818           len += ncols;
4819           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4820         }
4821         k++;
4822       }
4823       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4824       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4825     }
4826     /* recvs and sends of i-array are completed */
4827     i = nrecvs;
4828     while (i--) {
4829       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4830     }
4831     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4832 
4833     /* allocate buffers for sending j and a arrays */
4834     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4835     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4836 
4837     /* create i-array of B_oth */
4838     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4839     b_othi[0] = 0;
4840     len = 0; /* total length of j or a array to be received */
4841     k = 0;
4842     for (i=0; i<nrecvs; i++){
4843       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4844       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4845       for (j=0; j<nrows; j++) {
4846         b_othi[k+1] = b_othi[k] + rowlen[j];
4847         len += rowlen[j]; k++;
4848       }
4849       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4850     }
4851 
4852     /* allocate space for j and a arrrays of B_oth */
4853     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4854     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4855 
4856     /* j-array */
4857     /*---------*/
4858     /*  post receives of j-array */
4859     for (i=0; i<nrecvs; i++){
4860       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4861       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4862     }
4863 
4864     /* pack the outgoing message j-array */
4865     k = 0;
4866     for (i=0; i<nsends; i++){
4867       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4868       bufJ = bufj+sstartsj[i];
4869       for (j=0; j<nrows; j++) {
4870         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4871         for (ll=0; ll<sbs; ll++){
4872           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4873           for (l=0; l<ncols; l++){
4874             *bufJ++ = cols[l];
4875           }
4876           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4877         }
4878       }
4879       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4880     }
4881 
4882     /* recvs and sends of j-array are completed */
4883     i = nrecvs;
4884     while (i--) {
4885       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4886     }
4887     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4888   } else if (scall == MAT_REUSE_MATRIX){
4889     sstartsj = *startsj;
4890     rstartsj = *startsj_r;
4891     bufa     = *bufa_ptr;
4892     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4893     b_otha   = b_oth->a;
4894   } else {
4895     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4896   }
4897 
4898   /* a-array */
4899   /*---------*/
4900   /*  post receives of a-array */
4901   for (i=0; i<nrecvs; i++){
4902     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4903     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4904   }
4905 
4906   /* pack the outgoing message a-array */
4907   k = 0;
4908   for (i=0; i<nsends; i++){
4909     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4910     bufA = bufa+sstartsj[i];
4911     for (j=0; j<nrows; j++) {
4912       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4913       for (ll=0; ll<sbs; ll++){
4914         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4915         for (l=0; l<ncols; l++){
4916           *bufA++ = vals[l];
4917         }
4918         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4919       }
4920     }
4921     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4922   }
4923   /* recvs and sends of a-array are completed */
4924   i = nrecvs;
4925   while (i--) {
4926     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4927   }
4928   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4929   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4930 
4931   if (scall == MAT_INITIAL_MATRIX){
4932     /* put together the new matrix */
4933     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4934 
4935     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4936     /* Since these are PETSc arrays, change flags to free them as necessary. */
4937     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4938     b_oth->free_a  = PETSC_TRUE;
4939     b_oth->free_ij = PETSC_TRUE;
4940     b_oth->nonew   = 0;
4941 
4942     ierr = PetscFree(bufj);CHKERRQ(ierr);
4943     if (!startsj || !bufa_ptr){
4944       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4945       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4946     } else {
4947       *startsj   = sstartsj;
4948       *startsj_r = rstartsj;
4949       *bufa_ptr  = bufa;
4950     }
4951   }
4952   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4953   PetscFunctionReturn(0);
4954 }
4955 
4956 #undef __FUNCT__
4957 #define __FUNCT__ "MatGetCommunicationStructs"
4958 /*@C
4959   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4960 
4961   Not Collective
4962 
4963   Input Parameters:
4964 . A - The matrix in mpiaij format
4965 
4966   Output Parameter:
4967 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4968 . colmap - A map from global column index to local index into lvec
4969 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4970 
4971   Level: developer
4972 
4973 @*/
4974 #if defined (PETSC_USE_CTABLE)
4975 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4976 #else
4977 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4978 #endif
4979 {
4980   Mat_MPIAIJ *a;
4981 
4982   PetscFunctionBegin;
4983   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4984   PetscValidPointer(lvec, 2);
4985   PetscValidPointer(colmap, 3);
4986   PetscValidPointer(multScatter, 4);
4987   a = (Mat_MPIAIJ *) A->data;
4988   if (lvec) *lvec = a->lvec;
4989   if (colmap) *colmap = a->colmap;
4990   if (multScatter) *multScatter = a->Mvctx;
4991   PetscFunctionReturn(0);
4992 }
4993 
4994 EXTERN_C_BEGIN
4995 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
4996 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
4997 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4998 EXTERN_C_END
4999 
5000 #undef __FUNCT__
5001 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5002 /*
5003     Computes (B'*A')' since computing B*A directly is untenable
5004 
5005                n                       p                          p
5006         (              )       (              )         (                  )
5007       m (      A       )  *  n (       B      )   =   m (         C        )
5008         (              )       (              )         (                  )
5009 
5010 */
5011 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5012 {
5013   PetscErrorCode     ierr;
5014   Mat                At,Bt,Ct;
5015 
5016   PetscFunctionBegin;
5017   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5018   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5019   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5020   ierr = MatDestroy(At);CHKERRQ(ierr);
5021   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5022   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5023   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5024   PetscFunctionReturn(0);
5025 }
5026 
5027 #undef __FUNCT__
5028 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5029 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5030 {
5031   PetscErrorCode ierr;
5032   PetscInt       m=A->rmap->n,n=B->cmap->n;
5033   Mat            Cmat;
5034 
5035   PetscFunctionBegin;
5036   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5037   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5038   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5039   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5040   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5041   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5042   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5043   *C   = Cmat;
5044   PetscFunctionReturn(0);
5045 }
5046 
5047 /* ----------------------------------------------------------------*/
5048 #undef __FUNCT__
5049 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5050 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5051 {
5052   PetscErrorCode ierr;
5053 
5054   PetscFunctionBegin;
5055   if (scall == MAT_INITIAL_MATRIX){
5056     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5057   }
5058   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5059   PetscFunctionReturn(0);
5060 }
5061 
5062 EXTERN_C_BEGIN
5063 #if defined(PETSC_HAVE_MUMPS)
5064 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5065 #endif
5066 #if defined(PETSC_HAVE_PASTIX)
5067 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5068 #endif
5069 #if defined(PETSC_HAVE_SUPERLU_DIST)
5070 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5071 #endif
5072 #if defined(PETSC_HAVE_SPOOLES)
5073 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5074 #endif
5075 EXTERN_C_END
5076 
5077 /*MC
5078    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5079 
5080    Options Database Keys:
5081 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5082 
5083   Level: beginner
5084 
5085 .seealso: MatCreateMPIAIJ()
5086 M*/
5087 
5088 EXTERN_C_BEGIN
5089 #undef __FUNCT__
5090 #define __FUNCT__ "MatCreate_MPIAIJ"
5091 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5092 {
5093   Mat_MPIAIJ     *b;
5094   PetscErrorCode ierr;
5095   PetscMPIInt    size;
5096 
5097   PetscFunctionBegin;
5098   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5099 
5100   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5101   B->data         = (void*)b;
5102   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5103   B->rmap->bs     = 1;
5104   B->assembled    = PETSC_FALSE;
5105   B->mapping      = 0;
5106 
5107   B->insertmode   = NOT_SET_VALUES;
5108   b->size         = size;
5109   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5110 
5111   /* build cache for off array entries formed */
5112   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5113   b->donotstash  = PETSC_FALSE;
5114   b->colmap      = 0;
5115   b->garray      = 0;
5116   b->roworiented = PETSC_TRUE;
5117 
5118   /* stuff used for matrix vector multiply */
5119   b->lvec      = PETSC_NULL;
5120   b->Mvctx     = PETSC_NULL;
5121 
5122   /* stuff for MatGetRow() */
5123   b->rowindices   = 0;
5124   b->rowvalues    = 0;
5125   b->getrowactive = PETSC_FALSE;
5126 
5127 #if defined(PETSC_HAVE_SPOOLES)
5128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5129                                      "MatGetFactor_mpiaij_spooles",
5130                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5131 #endif
5132 #if defined(PETSC_HAVE_MUMPS)
5133   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5134                                      "MatGetFactor_aij_mumps",
5135                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5136 #endif
5137 #if defined(PETSC_HAVE_PASTIX)
5138   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5139 					   "MatGetFactor_mpiaij_pastix",
5140 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5141 #endif
5142 #if defined(PETSC_HAVE_SUPERLU_DIST)
5143   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5144                                      "MatGetFactor_mpiaij_superlu_dist",
5145                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5146 #endif
5147   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5148                                      "MatStoreValues_MPIAIJ",
5149                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5150   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5151                                      "MatRetrieveValues_MPIAIJ",
5152                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5153   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5154 				     "MatGetDiagonalBlock_MPIAIJ",
5155                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5156   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5157 				     "MatIsTranspose_MPIAIJ",
5158 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5159   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5160 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5161 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5162   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5163 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5164 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5165   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5166 				     "MatDiagonalScaleLocal_MPIAIJ",
5167 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5168   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5169                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5170                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5171   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5172                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5173                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5174   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5175                                      "MatConvert_MPIAIJ_MPISBAIJ",
5176                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5177   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5178                                      "MatMatMult_MPIDense_MPIAIJ",
5179                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5180   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5181                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5182                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5183   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5184                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5185                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5186   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5187   PetscFunctionReturn(0);
5188 }
5189 EXTERN_C_END
5190 
5191 #undef __FUNCT__
5192 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5193 /*@
5194      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5195          and "off-diagonal" part of the matrix in CSR format.
5196 
5197    Collective on MPI_Comm
5198 
5199    Input Parameters:
5200 +  comm - MPI communicator
5201 .  m - number of local rows (Cannot be PETSC_DECIDE)
5202 .  n - This value should be the same as the local size used in creating the
5203        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5204        calculated if N is given) For square matrices n is almost always m.
5205 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5206 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5207 .   i - row indices for "diagonal" portion of matrix
5208 .   j - column indices
5209 .   a - matrix values
5210 .   oi - row indices for "off-diagonal" portion of matrix
5211 .   oj - column indices
5212 -   oa - matrix values
5213 
5214    Output Parameter:
5215 .   mat - the matrix
5216 
5217    Level: advanced
5218 
5219    Notes:
5220        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5221 
5222        The i and j indices are 0 based
5223 
5224        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5225 
5226        This sets local rows and cannot be used to set off-processor values.
5227 
5228        You cannot later use MatSetValues() to change values in this matrix.
5229 
5230 .keywords: matrix, aij, compressed row, sparse, parallel
5231 
5232 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5233           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5234 @*/
5235 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5236 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5237 {
5238   PetscErrorCode ierr;
5239   Mat_MPIAIJ     *maij;
5240 
5241  PetscFunctionBegin;
5242   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5243   if (i[0]) {
5244     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5245   }
5246   if (oi[0]) {
5247     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5248   }
5249   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5250   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5251   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5252   maij = (Mat_MPIAIJ*) (*mat)->data;
5253   maij->donotstash     = PETSC_TRUE;
5254   (*mat)->preallocated = PETSC_TRUE;
5255 
5256   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5257   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5258   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5259   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5260 
5261   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5262   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5263 
5264   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5265   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5266   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5267   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5268 
5269   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5270   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5271   PetscFunctionReturn(0);
5272 }
5273 
5274 /*
5275     Special version for direct calls from Fortran
5276 */
5277 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5278 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5279 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5280 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5281 #endif
5282 
5283 /* Change these macros so can be used in void function */
5284 #undef CHKERRQ
5285 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5286 #undef SETERRQ2
5287 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5288 #undef SETERRQ
5289 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5290 
5291 EXTERN_C_BEGIN
5292 #undef __FUNCT__
5293 #define __FUNCT__ "matsetvaluesmpiaij_"
5294 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5295 {
5296   Mat             mat = *mmat;
5297   PetscInt        m = *mm, n = *mn;
5298   InsertMode      addv = *maddv;
5299   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5300   PetscScalar     value;
5301   PetscErrorCode  ierr;
5302 
5303   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5304   if (mat->insertmode == NOT_SET_VALUES) {
5305     mat->insertmode = addv;
5306   }
5307 #if defined(PETSC_USE_DEBUG)
5308   else if (mat->insertmode != addv) {
5309     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5310   }
5311 #endif
5312   {
5313   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5314   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5315   PetscBool       roworiented = aij->roworiented;
5316 
5317   /* Some Variables required in the macro */
5318   Mat             A = aij->A;
5319   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5320   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5321   MatScalar       *aa = a->a;
5322   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5323   Mat             B = aij->B;
5324   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5325   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5326   MatScalar       *ba = b->a;
5327 
5328   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5329   PetscInt        nonew = a->nonew;
5330   MatScalar       *ap1,*ap2;
5331 
5332   PetscFunctionBegin;
5333   for (i=0; i<m; i++) {
5334     if (im[i] < 0) continue;
5335 #if defined(PETSC_USE_DEBUG)
5336     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5337 #endif
5338     if (im[i] >= rstart && im[i] < rend) {
5339       row      = im[i] - rstart;
5340       lastcol1 = -1;
5341       rp1      = aj + ai[row];
5342       ap1      = aa + ai[row];
5343       rmax1    = aimax[row];
5344       nrow1    = ailen[row];
5345       low1     = 0;
5346       high1    = nrow1;
5347       lastcol2 = -1;
5348       rp2      = bj + bi[row];
5349       ap2      = ba + bi[row];
5350       rmax2    = bimax[row];
5351       nrow2    = bilen[row];
5352       low2     = 0;
5353       high2    = nrow2;
5354 
5355       for (j=0; j<n; j++) {
5356         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5357         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5358         if (in[j] >= cstart && in[j] < cend){
5359           col = in[j] - cstart;
5360           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5361         } else if (in[j] < 0) continue;
5362 #if defined(PETSC_USE_DEBUG)
5363         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5364 #endif
5365         else {
5366           if (mat->was_assembled) {
5367             if (!aij->colmap) {
5368               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5369             }
5370 #if defined (PETSC_USE_CTABLE)
5371             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5372 	    col--;
5373 #else
5374             col = aij->colmap[in[j]] - 1;
5375 #endif
5376             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5377               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5378               col =  in[j];
5379               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5380               B = aij->B;
5381               b = (Mat_SeqAIJ*)B->data;
5382               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5383               rp2      = bj + bi[row];
5384               ap2      = ba + bi[row];
5385               rmax2    = bimax[row];
5386               nrow2    = bilen[row];
5387               low2     = 0;
5388               high2    = nrow2;
5389               bm       = aij->B->rmap->n;
5390               ba = b->a;
5391             }
5392           } else col = in[j];
5393           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5394         }
5395       }
5396     } else {
5397       if (!aij->donotstash) {
5398         if (roworiented) {
5399           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5400         } else {
5401           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5402         }
5403       }
5404     }
5405   }}
5406   PetscFunctionReturnVoid();
5407 }
5408 EXTERN_C_END
5409 
5410