xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 38aed534d607aa3ff7c8eaf51b631a23b914446d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap.n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap.n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap.n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap.n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
311   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap.n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
415   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap.range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap.n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap.N;
932     header[2] = mat->cmap.N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap.n;
938     range = mat->rmap.range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap.n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap.n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap.n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap.n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap.rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap.rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows,*rows;
1261   int            ntids;
1262   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1267   /* make a collective version of 'rowp' */
1268   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1269   if (pcomm==comm) {
1270     crowp = rowp;
1271   } else {
1272     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1273     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1274     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1275     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1276   }
1277   /* collect the global row permutation and invert it */
1278   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1279   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1280   if (pcomm!=comm) {
1281     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1282   }
1283   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1284   /* get the local target indices */
1285   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1286   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1288   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1289   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1290   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1291   /* the column permutation is so much easier;
1292      make a local version of 'colp' and invert it */
1293   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1294   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1295   if (ntids==1) {
1296     lcolp = colp;
1297   } else {
1298     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1299     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1300     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1301   }
1302   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1303   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1304   if (ntids>1) {
1305     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1306     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1307   }
1308   /* now we just get the submatrix */
1309   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1310   /* clean up */
1311   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1312   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1319 {
1320   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1321   Mat            A = mat->A,B = mat->B;
1322   PetscErrorCode ierr;
1323   PetscReal      isend[5],irecv[5];
1324 
1325   PetscFunctionBegin;
1326   info->block_size     = 1.0;
1327   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1328   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1329   isend[3] = info->memory;  isend[4] = info->mallocs;
1330   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1331   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1332   isend[3] += info->memory;  isend[4] += info->mallocs;
1333   if (flag == MAT_LOCAL) {
1334     info->nz_used      = isend[0];
1335     info->nz_allocated = isend[1];
1336     info->nz_unneeded  = isend[2];
1337     info->memory       = isend[3];
1338     info->mallocs      = isend[4];
1339   } else if (flag == MAT_GLOBAL_MAX) {
1340     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1341     info->nz_used      = irecv[0];
1342     info->nz_allocated = irecv[1];
1343     info->nz_unneeded  = irecv[2];
1344     info->memory       = irecv[3];
1345     info->mallocs      = irecv[4];
1346   } else if (flag == MAT_GLOBAL_SUM) {
1347     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1348     info->nz_used      = irecv[0];
1349     info->nz_allocated = irecv[1];
1350     info->nz_unneeded  = irecv[2];
1351     info->memory       = irecv[3];
1352     info->mallocs      = irecv[4];
1353   }
1354   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1355   info->fill_ratio_needed = 0;
1356   info->factor_mallocs    = 0;
1357   info->rows_global       = (double)matin->rmap.N;
1358   info->columns_global    = (double)matin->cmap.N;
1359   info->rows_local        = (double)matin->rmap.n;
1360   info->columns_local     = (double)matin->cmap.N;
1361 
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetOption_MPIAIJ"
1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1368 {
1369   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   switch (op) {
1374   case MAT_NEW_NONZERO_LOCATIONS:
1375   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1376   case MAT_KEEP_ZEROED_ROWS:
1377   case MAT_NEW_NONZERO_LOCATION_ERR:
1378   case MAT_USE_INODES:
1379   case MAT_IGNORE_ZERO_ENTRIES:
1380     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1381     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1382     break;
1383   case MAT_ROW_ORIENTED:
1384     a->roworiented = flg;
1385     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1386     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1387     break;
1388   case MAT_NEW_DIAGONALS:
1389     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1390     break;
1391   case MAT_IGNORE_OFF_PROC_ENTRIES:
1392     a->donotstash = PETSC_TRUE;
1393     break;
1394   case MAT_SYMMETRIC:
1395     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1396     break;
1397   case MAT_STRUCTURALLY_SYMMETRIC:
1398   case MAT_HERMITIAN:
1399   case MAT_SYMMETRY_ETERNAL:
1400     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1401     break;
1402   default:
1403     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetRow_MPIAIJ"
1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1411 {
1412   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1414   PetscErrorCode ierr;
1415   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1416   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1417   PetscInt       *cmap,*idx_p;
1418 
1419   PetscFunctionBegin;
1420   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1421   mat->getrowactive = PETSC_TRUE;
1422 
1423   if (!mat->rowvalues && (idx || v)) {
1424     /*
1425         allocate enough space to hold information from the longest row.
1426     */
1427     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1428     PetscInt     max = 1,tmp;
1429     for (i=0; i<matin->rmap.n; i++) {
1430       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1431       if (max < tmp) { max = tmp; }
1432     }
1433     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1434     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1435   }
1436 
1437   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1438   lrow = row - rstart;
1439 
1440   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1441   if (!v)   {pvA = 0; pvB = 0;}
1442   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1443   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1444   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1445   nztot = nzA + nzB;
1446 
1447   cmap  = mat->garray;
1448   if (v  || idx) {
1449     if (nztot) {
1450       /* Sort by increasing column numbers, assuming A and B already sorted */
1451       PetscInt imark = -1;
1452       if (v) {
1453         *v = v_p = mat->rowvalues;
1454         for (i=0; i<nzB; i++) {
1455           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1456           else break;
1457         }
1458         imark = i;
1459         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1460         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1461       }
1462       if (idx) {
1463         *idx = idx_p = mat->rowindices;
1464         if (imark > -1) {
1465           for (i=0; i<imark; i++) {
1466             idx_p[i] = cmap[cworkB[i]];
1467           }
1468         } else {
1469           for (i=0; i<nzB; i++) {
1470             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1471             else break;
1472           }
1473           imark = i;
1474         }
1475         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1476         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1477       }
1478     } else {
1479       if (idx) *idx = 0;
1480       if (v)   *v   = 0;
1481     }
1482   }
1483   *nz = nztot;
1484   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1485   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1492 {
1493   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1494 
1495   PetscFunctionBegin;
1496   if (!aij->getrowactive) {
1497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1498   }
1499   aij->getrowactive = PETSC_FALSE;
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatNorm_MPIAIJ"
1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1506 {
1507   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1508   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1509   PetscErrorCode ierr;
1510   PetscInt       i,j,cstart = mat->cmap.rstart;
1511   PetscReal      sum = 0.0;
1512   MatScalar      *v;
1513 
1514   PetscFunctionBegin;
1515   if (aij->size == 1) {
1516     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1517   } else {
1518     if (type == NORM_FROBENIUS) {
1519       v = amat->a;
1520       for (i=0; i<amat->nz; i++) {
1521 #if defined(PETSC_USE_COMPLEX)
1522         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1523 #else
1524         sum += (*v)*(*v); v++;
1525 #endif
1526       }
1527       v = bmat->a;
1528       for (i=0; i<bmat->nz; i++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1531 #else
1532         sum += (*v)*(*v); v++;
1533 #endif
1534       }
1535       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1536       *norm = sqrt(*norm);
1537     } else if (type == NORM_1) { /* max column norm */
1538       PetscReal *tmp,*tmp2;
1539       PetscInt  *jj,*garray = aij->garray;
1540       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1541       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1542       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1543       *norm = 0.0;
1544       v = amat->a; jj = amat->j;
1545       for (j=0; j<amat->nz; j++) {
1546         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1547       }
1548       v = bmat->a; jj = bmat->j;
1549       for (j=0; j<bmat->nz; j++) {
1550         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1551       }
1552       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1553       for (j=0; j<mat->cmap.N; j++) {
1554         if (tmp2[j] > *norm) *norm = tmp2[j];
1555       }
1556       ierr = PetscFree(tmp);CHKERRQ(ierr);
1557       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1558     } else if (type == NORM_INFINITY) { /* max row norm */
1559       PetscReal ntemp = 0.0;
1560       for (j=0; j<aij->A->rmap.n; j++) {
1561         v = amat->a + amat->i[j];
1562         sum = 0.0;
1563         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1564           sum += PetscAbsScalar(*v); v++;
1565         }
1566         v = bmat->a + bmat->i[j];
1567         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1568           sum += PetscAbsScalar(*v); v++;
1569         }
1570         if (sum > ntemp) ntemp = sum;
1571       }
1572       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1573     } else {
1574       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1575     }
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatTranspose_MPIAIJ"
1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1583 {
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1585   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1586   PetscErrorCode ierr;
1587   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1588   PetscInt       cstart=A->cmap.rstart,ncol;
1589   Mat            B;
1590   MatScalar      *array;
1591 
1592   PetscFunctionBegin;
1593   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1594 
1595   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1596   ai = Aloc->i; aj = Aloc->j;
1597   bi = Bloc->i; bj = Bloc->j;
1598   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1599     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1600     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1601     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1602     for (i=0; i<ai[ma]; i++){
1603       d_nnz[aj[i]] ++;
1604       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1605     }
1606 
1607     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1608     ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1609     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1610     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1611     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1612   } else {
1613     B = *matout;
1614   }
1615 
1616   /* copy over the A part */
1617   array = Aloc->a;
1618   row = A->rmap.rstart;
1619   for (i=0; i<ma; i++) {
1620     ncol = ai[i+1]-ai[i];
1621     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1622     row++; array += ncol; aj += ncol;
1623   }
1624   aj = Aloc->j;
1625   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1626 
1627   /* copy over the B part */
1628   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1629   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1630   array = Bloc->a;
1631   row = A->rmap.rstart;
1632   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1633   for (i=0; i<mb; i++) {
1634     ncol = bi[i+1]-bi[i];
1635     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1636     row++; array += ncol; cols += ncol;
1637   }
1638   ierr = PetscFree(cols);CHKERRQ(ierr);
1639 
1640   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   if (*matout != A) {
1643     *matout = B;
1644   } else {
1645     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1646   }
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1652 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1653 {
1654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1655   Mat            a = aij->A,b = aij->B;
1656   PetscErrorCode ierr;
1657   PetscInt       s1,s2,s3;
1658 
1659   PetscFunctionBegin;
1660   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1661   if (rr) {
1662     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1663     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1664     /* Overlap communication with computation. */
1665     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666   }
1667   if (ll) {
1668     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1669     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1670     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1671   }
1672   /* scale  the diagonal block */
1673   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1674 
1675   if (rr) {
1676     /* Do a scatter end and then right scale the off-diagonal block */
1677     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1679   }
1680 
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1686 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1687 {
1688   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1689   PetscErrorCode ierr;
1690 
1691   PetscFunctionBegin;
1692   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1693   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1698 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1699 {
1700   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1701   PetscErrorCode ierr;
1702 
1703   PetscFunctionBegin;
1704   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatEqual_MPIAIJ"
1710 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1711 {
1712   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1713   Mat            a,b,c,d;
1714   PetscTruth     flg;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   a = matA->A; b = matA->B;
1719   c = matB->A; d = matB->B;
1720 
1721   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1722   if (flg) {
1723     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1724   }
1725   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatCopy_MPIAIJ"
1731 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1732 {
1733   PetscErrorCode ierr;
1734   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1735   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1736 
1737   PetscFunctionBegin;
1738   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1739   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1740     /* because of the column compression in the off-processor part of the matrix a->B,
1741        the number of columns in a->B and b->B may be different, hence we cannot call
1742        the MatCopy() directly on the two parts. If need be, we can provide a more
1743        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1744        then copying the submatrices */
1745     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1746   } else {
1747     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1748     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1749   }
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1755 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1756 {
1757   PetscErrorCode ierr;
1758 
1759   PetscFunctionBegin;
1760   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #include "petscblaslapack.h"
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatAXPY_MPIAIJ"
1767 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1768 {
1769   PetscErrorCode ierr;
1770   PetscInt       i;
1771   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1772   PetscBLASInt   bnz,one=1;
1773   Mat_SeqAIJ     *x,*y;
1774 
1775   PetscFunctionBegin;
1776   if (str == SAME_NONZERO_PATTERN) {
1777     PetscScalar alpha = a;
1778     x = (Mat_SeqAIJ *)xx->A->data;
1779     y = (Mat_SeqAIJ *)yy->A->data;
1780     bnz = PetscBLASIntCast(x->nz);
1781     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1782     x = (Mat_SeqAIJ *)xx->B->data;
1783     y = (Mat_SeqAIJ *)yy->B->data;
1784     bnz = PetscBLASIntCast(x->nz);
1785     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1786   } else if (str == SUBSET_NONZERO_PATTERN) {
1787     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1788 
1789     x = (Mat_SeqAIJ *)xx->B->data;
1790     y = (Mat_SeqAIJ *)yy->B->data;
1791     if (y->xtoy && y->XtoY != xx->B) {
1792       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1793       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1794     }
1795     if (!y->xtoy) { /* get xtoy */
1796       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1797       y->XtoY = xx->B;
1798       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1799     }
1800     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1801   } else {
1802     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1803   }
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatConjugate_MPIAIJ"
1811 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1812 {
1813 #if defined(PETSC_USE_COMPLEX)
1814   PetscErrorCode ierr;
1815   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1816 
1817   PetscFunctionBegin;
1818   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1819   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1820 #else
1821   PetscFunctionBegin;
1822 #endif
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatRealPart_MPIAIJ"
1828 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1829 {
1830   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1831   PetscErrorCode ierr;
1832 
1833   PetscFunctionBegin;
1834   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1835   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1841 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1842 {
1843   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1844   PetscErrorCode ierr;
1845 
1846   PetscFunctionBegin;
1847   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1848   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 #ifdef PETSC_HAVE_PBGL
1853 
1854 #include <boost/parallel/mpi/bsp_process_group.hpp>
1855 #include <boost/graph/distributed/ilu_default_graph.hpp>
1856 #include <boost/graph/distributed/ilu_0_block.hpp>
1857 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1858 #include <boost/graph/distributed/petsc/interface.hpp>
1859 #include <boost/multi_array.hpp>
1860 #include <boost/parallel/distributed_property_map.hpp>
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1864 /*
1865   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1866 */
1867 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1868 {
1869   namespace petsc = boost::distributed::petsc;
1870 
1871   namespace graph_dist = boost::graph::distributed;
1872   using boost::graph::distributed::ilu_default::process_group_type;
1873   using boost::graph::ilu_permuted;
1874 
1875   PetscTruth      row_identity, col_identity;
1876   PetscContainer  c;
1877   PetscInt        m, n, M, N;
1878   PetscErrorCode  ierr;
1879 
1880   PetscFunctionBegin;
1881   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1882   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1883   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1884   if (!row_identity || !col_identity) {
1885     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1886   }
1887 
1888   process_group_type pg;
1889   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1890   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1891   lgraph_type&   level_graph = *lgraph_p;
1892   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1893 
1894   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1895   ilu_permuted(level_graph);
1896 
1897   /* put together the new matrix */
1898   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1899   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1900   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1901   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1902   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1903   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1904   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1905   (*fact)->factor = FACTOR_LU;
1906 
1907   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1908   ierr = PetscContainerSetPointer(c, lgraph_p);
1909   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1915 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1916 {
1917   PetscFunctionBegin;
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatSolve_MPIAIJ"
1923 /*
1924   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1925 */
1926 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1927 {
1928   namespace graph_dist = boost::graph::distributed;
1929 
1930   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1931   lgraph_type*   lgraph_p;
1932   PetscContainer c;
1933   PetscErrorCode ierr;
1934 
1935   PetscFunctionBegin;
1936   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1937   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1938   ierr = VecCopy(b, x); CHKERRQ(ierr);
1939 
1940   PetscScalar* array_x;
1941   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1942   PetscInt sx;
1943   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1944 
1945   PetscScalar* array_b;
1946   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1947   PetscInt sb;
1948   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1949 
1950   lgraph_type&   level_graph = *lgraph_p;
1951   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1952 
1953   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1954   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1955                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1956 
1957   typedef boost::iterator_property_map<array_ref_type::iterator,
1958                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1959   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1960                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1961 
1962   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1963 
1964   PetscFunctionReturn(0);
1965 }
1966 #endif
1967 
1968 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1969   PetscInt       nzlocal,nsends,nrecvs;
1970   PetscMPIInt    *send_rank;
1971   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1972   PetscScalar    *sbuf_a,**rbuf_a;
1973   PetscErrorCode (*MatDestroy)(Mat);
1974 } Mat_Redundant;
1975 
1976 #undef __FUNCT__
1977 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1978 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1979 {
1980   PetscErrorCode       ierr;
1981   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1982   PetscInt             i;
1983 
1984   PetscFunctionBegin;
1985   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1986   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1987   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1988   for (i=0; i<redund->nrecvs; i++){
1989     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1990     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1991   }
1992   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1993   ierr = PetscFree(redund);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatDestroy_MatRedundant"
1999 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2000 {
2001   PetscErrorCode  ierr;
2002   PetscContainer  container;
2003   Mat_Redundant   *redund=PETSC_NULL;
2004 
2005   PetscFunctionBegin;
2006   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2007   if (container) {
2008     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2009   } else {
2010     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2011   }
2012   A->ops->destroy = redund->MatDestroy;
2013   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2014   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2015   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2021 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2022 {
2023   PetscMPIInt    rank,size;
2024   MPI_Comm       comm=((PetscObject)mat)->comm;
2025   PetscErrorCode ierr;
2026   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2027   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2028   PetscInt       *rowrange=mat->rmap.range;
2029   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2030   Mat            A=aij->A,B=aij->B,C=*matredundant;
2031   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2032   PetscScalar    *sbuf_a;
2033   PetscInt       nzlocal=a->nz+b->nz;
2034   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2035   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
2036   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2037   MatScalar      *aworkA,*aworkB;
2038   PetscScalar    *vals;
2039   PetscMPIInt    tag1,tag2,tag3,imdex;
2040   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2041                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2042   MPI_Status     recv_status,*send_status;
2043   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2044   PetscInt       **rbuf_j=PETSC_NULL;
2045   PetscScalar    **rbuf_a=PETSC_NULL;
2046   Mat_Redundant  *redund=PETSC_NULL;
2047   PetscContainer container;
2048 
2049   PetscFunctionBegin;
2050   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2051   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2052 
2053   if (reuse == MAT_REUSE_MATRIX) {
2054     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2055     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2056     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2057     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2058     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2059     if (container) {
2060       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2061     } else {
2062       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2063     }
2064     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2065 
2066     nsends    = redund->nsends;
2067     nrecvs    = redund->nrecvs;
2068     send_rank = redund->send_rank; recv_rank = send_rank + size;
2069     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2070     sbuf_j    = redund->sbuf_j;
2071     sbuf_a    = redund->sbuf_a;
2072     rbuf_j    = redund->rbuf_j;
2073     rbuf_a    = redund->rbuf_a;
2074   }
2075 
2076   if (reuse == MAT_INITIAL_MATRIX){
2077     PetscMPIInt  subrank,subsize;
2078     PetscInt     nleftover,np_subcomm;
2079     /* get the destination processors' id send_rank, nsends and nrecvs */
2080     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2081     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2082     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2083     recv_rank = send_rank + size;
2084     np_subcomm = size/nsubcomm;
2085     nleftover  = size - nsubcomm*np_subcomm;
2086     nsends = 0; nrecvs = 0;
2087     for (i=0; i<size; i++){ /* i=rank*/
2088       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2089         send_rank[nsends] = i; nsends++;
2090         recv_rank[nrecvs++] = i;
2091       }
2092     }
2093     if (rank >= size - nleftover){/* this proc is a leftover processor */
2094       i = size-nleftover-1;
2095       j = 0;
2096       while (j < nsubcomm - nleftover){
2097         send_rank[nsends++] = i;
2098         i--; j++;
2099       }
2100     }
2101 
2102     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2103       for (i=0; i<nleftover; i++){
2104         recv_rank[nrecvs++] = size-nleftover+i;
2105       }
2106     }
2107 
2108     /* allocate sbuf_j, sbuf_a */
2109     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2110     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2111     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2112   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2113 
2114   /* copy mat's local entries into the buffers */
2115   if (reuse == MAT_INITIAL_MATRIX){
2116     rownz_max = 0;
2117     rptr = sbuf_j;
2118     cols = sbuf_j + rend-rstart + 1;
2119     vals = sbuf_a;
2120     rptr[0] = 0;
2121     for (i=0; i<rend-rstart; i++){
2122       row = i + rstart;
2123       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2124       ncols  = nzA + nzB;
2125       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2126       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2127       /* load the column indices for this row into cols */
2128       lwrite = 0;
2129       for (l=0; l<nzB; l++) {
2130         if ((ctmp = bmap[cworkB[l]]) < cstart){
2131           vals[lwrite]   = aworkB[l];
2132           cols[lwrite++] = ctmp;
2133         }
2134       }
2135       for (l=0; l<nzA; l++){
2136         vals[lwrite]   = aworkA[l];
2137         cols[lwrite++] = cstart + cworkA[l];
2138       }
2139       for (l=0; l<nzB; l++) {
2140         if ((ctmp = bmap[cworkB[l]]) >= cend){
2141           vals[lwrite]   = aworkB[l];
2142           cols[lwrite++] = ctmp;
2143         }
2144       }
2145       vals += ncols;
2146       cols += ncols;
2147       rptr[i+1] = rptr[i] + ncols;
2148       if (rownz_max < ncols) rownz_max = ncols;
2149     }
2150     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2151   } else { /* only copy matrix values into sbuf_a */
2152     rptr = sbuf_j;
2153     vals = sbuf_a;
2154     rptr[0] = 0;
2155     for (i=0; i<rend-rstart; i++){
2156       row = i + rstart;
2157       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2158       ncols  = nzA + nzB;
2159       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2160       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2161       lwrite = 0;
2162       for (l=0; l<nzB; l++) {
2163         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2164       }
2165       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2166       for (l=0; l<nzB; l++) {
2167         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2168       }
2169       vals += ncols;
2170       rptr[i+1] = rptr[i] + ncols;
2171     }
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* send nzlocal to others, and recv other's nzlocal */
2175   /*--------------------------------------------------*/
2176   if (reuse == MAT_INITIAL_MATRIX){
2177     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2178     s_waits2 = s_waits3 + nsends;
2179     s_waits1 = s_waits2 + nsends;
2180     r_waits1 = s_waits1 + nsends;
2181     r_waits2 = r_waits1 + nrecvs;
2182     r_waits3 = r_waits2 + nrecvs;
2183   } else {
2184     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2185     r_waits3 = s_waits3 + nsends;
2186   }
2187 
2188   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2189   if (reuse == MAT_INITIAL_MATRIX){
2190     /* get new tags to keep the communication clean */
2191     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2192     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2193     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2194     rbuf_nz = sbuf_nz + nsends;
2195 
2196     /* post receives of other's nzlocal */
2197     for (i=0; i<nrecvs; i++){
2198       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2199     }
2200     /* send nzlocal to others */
2201     for (i=0; i<nsends; i++){
2202       sbuf_nz[i] = nzlocal;
2203       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2204     }
2205     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2206     count = nrecvs;
2207     while (count) {
2208       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2209       recv_rank[imdex] = recv_status.MPI_SOURCE;
2210       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2211       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2212 
2213       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2214       rbuf_nz[imdex] += i + 2;
2215       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2216       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2217       count--;
2218     }
2219     /* wait on sends of nzlocal */
2220     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2221     /* send mat->i,j to others, and recv from other's */
2222     /*------------------------------------------------*/
2223     for (i=0; i<nsends; i++){
2224       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2225       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2226     }
2227     /* wait on receives of mat->i,j */
2228     /*------------------------------*/
2229     count = nrecvs;
2230     while (count) {
2231       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2232       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2233       count--;
2234     }
2235     /* wait on sends of mat->i,j */
2236     /*---------------------------*/
2237     if (nsends) {
2238       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2239     }
2240   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2241 
2242   /* post receives, send and receive mat->a */
2243   /*----------------------------------------*/
2244   for (imdex=0; imdex<nrecvs; imdex++) {
2245     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2246   }
2247   for (i=0; i<nsends; i++){
2248     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2249   }
2250   count = nrecvs;
2251   while (count) {
2252     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2253     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2254     count--;
2255   }
2256   if (nsends) {
2257     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2258   }
2259 
2260   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2261 
2262   /* create redundant matrix */
2263   /*-------------------------*/
2264   if (reuse == MAT_INITIAL_MATRIX){
2265     /* compute rownz_max for preallocation */
2266     for (imdex=0; imdex<nrecvs; imdex++){
2267       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2268       rptr = rbuf_j[imdex];
2269       for (i=0; i<j; i++){
2270         ncols = rptr[i+1] - rptr[i];
2271         if (rownz_max < ncols) rownz_max = ncols;
2272       }
2273     }
2274 
2275     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2276     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2277     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2278     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2279     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2280   } else {
2281     C = *matredundant;
2282   }
2283 
2284   /* insert local matrix entries */
2285   rptr = sbuf_j;
2286   cols = sbuf_j + rend-rstart + 1;
2287   vals = sbuf_a;
2288   for (i=0; i<rend-rstart; i++){
2289     row   = i + rstart;
2290     ncols = rptr[i+1] - rptr[i];
2291     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2292     vals += ncols;
2293     cols += ncols;
2294   }
2295   /* insert received matrix entries */
2296   for (imdex=0; imdex<nrecvs; imdex++){
2297     rstart = rowrange[recv_rank[imdex]];
2298     rend   = rowrange[recv_rank[imdex]+1];
2299     rptr = rbuf_j[imdex];
2300     cols = rbuf_j[imdex] + rend-rstart + 1;
2301     vals = rbuf_a[imdex];
2302     for (i=0; i<rend-rstart; i++){
2303       row   = i + rstart;
2304       ncols = rptr[i+1] - rptr[i];
2305       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2306       vals += ncols;
2307       cols += ncols;
2308     }
2309   }
2310   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2311   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2312   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2313   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2314   if (reuse == MAT_INITIAL_MATRIX){
2315     PetscContainer container;
2316     *matredundant = C;
2317     /* create a supporting struct and attach it to C for reuse */
2318     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2319     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2320     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2321     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2322     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2323 
2324     redund->nzlocal = nzlocal;
2325     redund->nsends  = nsends;
2326     redund->nrecvs  = nrecvs;
2327     redund->send_rank = send_rank;
2328     redund->sbuf_nz = sbuf_nz;
2329     redund->sbuf_j  = sbuf_j;
2330     redund->sbuf_a  = sbuf_a;
2331     redund->rbuf_j  = rbuf_j;
2332     redund->rbuf_a  = rbuf_a;
2333 
2334     redund->MatDestroy = C->ops->destroy;
2335     C->ops->destroy    = MatDestroy_MatRedundant;
2336   }
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2342 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2343 {
2344   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2345   PetscInt       n      = A->rmap.n;
2346   PetscInt       cstart = A->cmap.rstart;
2347   PetscInt      *cmap   = mat->garray;
2348   PetscInt      *diagIdx, *offdiagIdx;
2349   Vec            diagV, offdiagV;
2350   PetscScalar   *a, *diagA, *offdiagA;
2351   PetscInt       r;
2352   PetscErrorCode ierr;
2353 
2354   PetscFunctionBegin;
2355   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2356   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2357   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2358   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2359   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2360   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2361   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2362   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2363   for(r = 0; r < n; ++r) {
2364     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2365       a[r]   = diagA[r];
2366       idx[r] = cstart + diagIdx[r];
2367     } else {
2368       a[r]   = offdiagA[r];
2369       idx[r] = cmap[offdiagIdx[r]];
2370     }
2371   }
2372   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2373   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2374   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2375   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2376   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2377   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2383 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2384 {
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 /* -------------------------------------------------------------------*/
2393 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2394        MatGetRow_MPIAIJ,
2395        MatRestoreRow_MPIAIJ,
2396        MatMult_MPIAIJ,
2397 /* 4*/ MatMultAdd_MPIAIJ,
2398        MatMultTranspose_MPIAIJ,
2399        MatMultTransposeAdd_MPIAIJ,
2400 #ifdef PETSC_HAVE_PBGL
2401        MatSolve_MPIAIJ,
2402 #else
2403        0,
2404 #endif
2405        0,
2406        0,
2407 /*10*/ 0,
2408        0,
2409        0,
2410        MatRelax_MPIAIJ,
2411        MatTranspose_MPIAIJ,
2412 /*15*/ MatGetInfo_MPIAIJ,
2413        MatEqual_MPIAIJ,
2414        MatGetDiagonal_MPIAIJ,
2415        MatDiagonalScale_MPIAIJ,
2416        MatNorm_MPIAIJ,
2417 /*20*/ MatAssemblyBegin_MPIAIJ,
2418        MatAssemblyEnd_MPIAIJ,
2419        0,
2420        MatSetOption_MPIAIJ,
2421        MatZeroEntries_MPIAIJ,
2422 /*25*/ MatZeroRows_MPIAIJ,
2423        0,
2424 #ifdef PETSC_HAVE_PBGL
2425        MatLUFactorNumeric_MPIAIJ,
2426 #else
2427        0,
2428 #endif
2429        0,
2430        0,
2431 /*30*/ MatSetUpPreallocation_MPIAIJ,
2432 #ifdef PETSC_HAVE_PBGL
2433        MatILUFactorSymbolic_MPIAIJ,
2434 #else
2435        0,
2436 #endif
2437        0,
2438        0,
2439        0,
2440 /*35*/ MatDuplicate_MPIAIJ,
2441        0,
2442        0,
2443        0,
2444        0,
2445 /*40*/ MatAXPY_MPIAIJ,
2446        MatGetSubMatrices_MPIAIJ,
2447        MatIncreaseOverlap_MPIAIJ,
2448        MatGetValues_MPIAIJ,
2449        MatCopy_MPIAIJ,
2450 /*45*/ 0,
2451        MatScale_MPIAIJ,
2452        0,
2453        0,
2454        0,
2455 /*50*/ MatSetBlockSize_MPIAIJ,
2456        0,
2457        0,
2458        0,
2459        0,
2460 /*55*/ MatFDColoringCreate_MPIAIJ,
2461        0,
2462        MatSetUnfactored_MPIAIJ,
2463        MatPermute_MPIAIJ,
2464        0,
2465 /*60*/ MatGetSubMatrix_MPIAIJ,
2466        MatDestroy_MPIAIJ,
2467        MatView_MPIAIJ,
2468        0,
2469        0,
2470 /*65*/ 0,
2471        0,
2472        0,
2473        0,
2474        0,
2475 /*70*/ 0,
2476        0,
2477        MatSetColoring_MPIAIJ,
2478 #if defined(PETSC_HAVE_ADIC)
2479        MatSetValuesAdic_MPIAIJ,
2480 #else
2481        0,
2482 #endif
2483        MatSetValuesAdifor_MPIAIJ,
2484 /*75*/ 0,
2485        0,
2486        0,
2487        0,
2488        0,
2489 /*80*/ 0,
2490        0,
2491        0,
2492        0,
2493 /*84*/ MatLoad_MPIAIJ,
2494        0,
2495        0,
2496        0,
2497        0,
2498        0,
2499 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2500        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2501        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2502        MatPtAP_Basic,
2503        MatPtAPSymbolic_MPIAIJ,
2504 /*95*/ MatPtAPNumeric_MPIAIJ,
2505        0,
2506        0,
2507        0,
2508        0,
2509 /*100*/0,
2510        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2511        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2512        MatConjugate_MPIAIJ,
2513        0,
2514 /*105*/MatSetValuesRow_MPIAIJ,
2515        MatRealPart_MPIAIJ,
2516        MatImaginaryPart_MPIAIJ,
2517        0,
2518        0,
2519 /*110*/0,
2520        MatGetRedundantMatrix_MPIAIJ,
2521        MatGetRowMin_MPIAIJ,
2522        0,
2523        0,
2524 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2525 
2526 /* ----------------------------------------------------------------------------------------*/
2527 
2528 EXTERN_C_BEGIN
2529 #undef __FUNCT__
2530 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2531 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2532 {
2533   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2534   PetscErrorCode ierr;
2535 
2536   PetscFunctionBegin;
2537   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2538   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 EXTERN_C_END
2542 
2543 EXTERN_C_BEGIN
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2546 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2547 {
2548   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2549   PetscErrorCode ierr;
2550 
2551   PetscFunctionBegin;
2552   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2553   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 EXTERN_C_END
2557 
2558 #include "petscpc.h"
2559 EXTERN_C_BEGIN
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2562 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2563 {
2564   Mat_MPIAIJ     *b;
2565   PetscErrorCode ierr;
2566   PetscInt       i;
2567 
2568   PetscFunctionBegin;
2569   B->preallocated = PETSC_TRUE;
2570   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2571   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2572   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2573   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2574 
2575   B->rmap.bs = B->cmap.bs = 1;
2576   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2577   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2578   if (d_nnz) {
2579     for (i=0; i<B->rmap.n; i++) {
2580       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2581     }
2582   }
2583   if (o_nnz) {
2584     for (i=0; i<B->rmap.n; i++) {
2585       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2586     }
2587   }
2588   b = (Mat_MPIAIJ*)B->data;
2589 
2590   /* Explicitly create 2 MATSEQAIJ matrices. */
2591   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2592   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2593   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2594   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2595   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2596   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2597   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2598   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2599 
2600   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2601   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2602 
2603   PetscFunctionReturn(0);
2604 }
2605 EXTERN_C_END
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2609 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2610 {
2611   Mat            mat;
2612   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2613   PetscErrorCode ierr;
2614 
2615   PetscFunctionBegin;
2616   *newmat       = 0;
2617   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2618   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2619   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2620   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2621   a    = (Mat_MPIAIJ*)mat->data;
2622 
2623   mat->factor       = matin->factor;
2624   mat->rmap.bs      = matin->rmap.bs;
2625   mat->assembled    = PETSC_TRUE;
2626   mat->insertmode   = NOT_SET_VALUES;
2627   mat->preallocated = PETSC_TRUE;
2628 
2629   a->size           = oldmat->size;
2630   a->rank           = oldmat->rank;
2631   a->donotstash     = oldmat->donotstash;
2632   a->roworiented    = oldmat->roworiented;
2633   a->rowindices     = 0;
2634   a->rowvalues      = 0;
2635   a->getrowactive   = PETSC_FALSE;
2636 
2637   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2638   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2639 
2640   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2641   if (oldmat->colmap) {
2642 #if defined (PETSC_USE_CTABLE)
2643     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2644 #else
2645     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2646     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2647     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2648 #endif
2649   } else a->colmap = 0;
2650   if (oldmat->garray) {
2651     PetscInt len;
2652     len  = oldmat->B->cmap.n;
2653     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2654     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2655     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2656   } else a->garray = 0;
2657 
2658   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2659   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2660   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2661   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2662   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2663   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2664   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2665   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2666   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2667   *newmat = mat;
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 #include "petscsys.h"
2672 
2673 #undef __FUNCT__
2674 #define __FUNCT__ "MatLoad_MPIAIJ"
2675 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2676 {
2677   Mat            A;
2678   PetscScalar    *vals,*svals;
2679   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2680   MPI_Status     status;
2681   PetscErrorCode ierr;
2682   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2683   PetscInt       i,nz,j,rstart,rend,mmax;
2684   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2685   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2686   PetscInt       cend,cstart,n,*rowners;
2687   int            fd;
2688 
2689   PetscFunctionBegin;
2690   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2691   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2692   if (!rank) {
2693     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2694     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2695     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2696   }
2697 
2698   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2699   M = header[1]; N = header[2];
2700   /* determine ownership of all rows */
2701   m    = M/size + ((M % size) > rank);
2702   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2703   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2704 
2705   /* First process needs enough room for process with most rows */
2706   if (!rank) {
2707     mmax       = rowners[1];
2708     for (i=2; i<size; i++) {
2709       mmax = PetscMax(mmax,rowners[i]);
2710     }
2711   } else mmax = m;
2712 
2713   rowners[0] = 0;
2714   for (i=2; i<=size; i++) {
2715     rowners[i] += rowners[i-1];
2716   }
2717   rstart = rowners[rank];
2718   rend   = rowners[rank+1];
2719 
2720   /* distribute row lengths to all processors */
2721   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2722   if (!rank) {
2723     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2724     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2725     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2726     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2727     for (j=0; j<m; j++) {
2728       procsnz[0] += ourlens[j];
2729     }
2730     for (i=1; i<size; i++) {
2731       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2732       /* calculate the number of nonzeros on each processor */
2733       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2734         procsnz[i] += rowlengths[j];
2735       }
2736       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2737     }
2738     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2739   } else {
2740     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2741   }
2742 
2743   if (!rank) {
2744     /* determine max buffer needed and allocate it */
2745     maxnz = 0;
2746     for (i=0; i<size; i++) {
2747       maxnz = PetscMax(maxnz,procsnz[i]);
2748     }
2749     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2750 
2751     /* read in my part of the matrix column indices  */
2752     nz   = procsnz[0];
2753     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2754     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2755 
2756     /* read in every one elses and ship off */
2757     for (i=1; i<size; i++) {
2758       nz   = procsnz[i];
2759       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2760       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2761     }
2762     ierr = PetscFree(cols);CHKERRQ(ierr);
2763   } else {
2764     /* determine buffer space needed for message */
2765     nz = 0;
2766     for (i=0; i<m; i++) {
2767       nz += ourlens[i];
2768     }
2769     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2770 
2771     /* receive message of column indices*/
2772     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2773     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2774     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2775   }
2776 
2777   /* determine column ownership if matrix is not square */
2778   if (N != M) {
2779     n      = N/size + ((N % size) > rank);
2780     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2781     cstart = cend - n;
2782   } else {
2783     cstart = rstart;
2784     cend   = rend;
2785     n      = cend - cstart;
2786   }
2787 
2788   /* loop over local rows, determining number of off diagonal entries */
2789   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2790   jj = 0;
2791   for (i=0; i<m; i++) {
2792     for (j=0; j<ourlens[i]; j++) {
2793       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2794       jj++;
2795     }
2796   }
2797 
2798   /* create our matrix */
2799   for (i=0; i<m; i++) {
2800     ourlens[i] -= offlens[i];
2801   }
2802   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2803   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2804   ierr = MatSetType(A,type);CHKERRQ(ierr);
2805   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2806 
2807   for (i=0; i<m; i++) {
2808     ourlens[i] += offlens[i];
2809   }
2810 
2811   if (!rank) {
2812     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2813 
2814     /* read in my part of the matrix numerical values  */
2815     nz   = procsnz[0];
2816     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2817 
2818     /* insert into matrix */
2819     jj      = rstart;
2820     smycols = mycols;
2821     svals   = vals;
2822     for (i=0; i<m; i++) {
2823       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2824       smycols += ourlens[i];
2825       svals   += ourlens[i];
2826       jj++;
2827     }
2828 
2829     /* read in other processors and ship out */
2830     for (i=1; i<size; i++) {
2831       nz   = procsnz[i];
2832       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2833       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2834     }
2835     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2836   } else {
2837     /* receive numeric values */
2838     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2839 
2840     /* receive message of values*/
2841     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2842     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2843     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2844 
2845     /* insert into matrix */
2846     jj      = rstart;
2847     smycols = mycols;
2848     svals   = vals;
2849     for (i=0; i<m; i++) {
2850       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2851       smycols += ourlens[i];
2852       svals   += ourlens[i];
2853       jj++;
2854     }
2855   }
2856   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2857   ierr = PetscFree(vals);CHKERRQ(ierr);
2858   ierr = PetscFree(mycols);CHKERRQ(ierr);
2859   ierr = PetscFree(rowners);CHKERRQ(ierr);
2860 
2861   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2863   *newmat = A;
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2869 /*
2870     Not great since it makes two copies of the submatrix, first an SeqAIJ
2871   in local and then by concatenating the local matrices the end result.
2872   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2873 */
2874 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2875 {
2876   PetscErrorCode ierr;
2877   PetscMPIInt    rank,size;
2878   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2879   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2880   Mat            *local,M,Mreuse;
2881   MatScalar      *vwork,*aa;
2882   MPI_Comm       comm = ((PetscObject)mat)->comm;
2883   Mat_SeqAIJ     *aij;
2884 
2885 
2886   PetscFunctionBegin;
2887   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2888   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2889 
2890   if (call ==  MAT_REUSE_MATRIX) {
2891     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2892     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2893     local = &Mreuse;
2894     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2895   } else {
2896     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2897     Mreuse = *local;
2898     ierr   = PetscFree(local);CHKERRQ(ierr);
2899   }
2900 
2901   /*
2902       m - number of local rows
2903       n - number of columns (same on all processors)
2904       rstart - first row in new global matrix generated
2905   */
2906   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2907   if (call == MAT_INITIAL_MATRIX) {
2908     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2909     ii  = aij->i;
2910     jj  = aij->j;
2911 
2912     /*
2913         Determine the number of non-zeros in the diagonal and off-diagonal
2914         portions of the matrix in order to do correct preallocation
2915     */
2916 
2917     /* first get start and end of "diagonal" columns */
2918     if (csize == PETSC_DECIDE) {
2919       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2920       if (mglobal == n) { /* square matrix */
2921 	nlocal = m;
2922       } else {
2923         nlocal = n/size + ((n % size) > rank);
2924       }
2925     } else {
2926       nlocal = csize;
2927     }
2928     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2929     rstart = rend - nlocal;
2930     if (rank == size - 1 && rend != n) {
2931       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2932     }
2933 
2934     /* next, compute all the lengths */
2935     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2936     olens = dlens + m;
2937     for (i=0; i<m; i++) {
2938       jend = ii[i+1] - ii[i];
2939       olen = 0;
2940       dlen = 0;
2941       for (j=0; j<jend; j++) {
2942         if (*jj < rstart || *jj >= rend) olen++;
2943         else dlen++;
2944         jj++;
2945       }
2946       olens[i] = olen;
2947       dlens[i] = dlen;
2948     }
2949     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2950     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2951     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2952     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2953     ierr = PetscFree(dlens);CHKERRQ(ierr);
2954   } else {
2955     PetscInt ml,nl;
2956 
2957     M = *newmat;
2958     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2959     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2960     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2961     /*
2962          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2963        rather than the slower MatSetValues().
2964     */
2965     M->was_assembled = PETSC_TRUE;
2966     M->assembled     = PETSC_FALSE;
2967   }
2968   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2969   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2970   ii  = aij->i;
2971   jj  = aij->j;
2972   aa  = aij->a;
2973   for (i=0; i<m; i++) {
2974     row   = rstart + i;
2975     nz    = ii[i+1] - ii[i];
2976     cwork = jj;     jj += nz;
2977     vwork = aa;     aa += nz;
2978     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2979   }
2980 
2981   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2982   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2983   *newmat = M;
2984 
2985   /* save submatrix used in processor for next request */
2986   if (call ==  MAT_INITIAL_MATRIX) {
2987     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2988     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2989   }
2990 
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 EXTERN_C_BEGIN
2995 #undef __FUNCT__
2996 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2997 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2998 {
2999   PetscInt       m,cstart, cend,j,nnz,i,d;
3000   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3001   const PetscInt *JJ;
3002   PetscScalar    *values;
3003   PetscErrorCode ierr;
3004 
3005   PetscFunctionBegin;
3006   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3007 
3008   B->rmap.bs = B->cmap.bs = 1;
3009   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
3010   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
3011   m      = B->rmap.n;
3012   cstart = B->cmap.rstart;
3013   cend   = B->cmap.rend;
3014   rstart = B->rmap.rstart;
3015 
3016   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3017   o_nnz = d_nnz + m;
3018 
3019 #if defined(PETSC_USE_DEBUGGING)
3020   for (i=0; i<m; i++) {
3021     nnz     = Ii[i+1]- Ii[i];
3022     JJ      = J + Ii[i];
3023     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3024     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3025     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
3026     for (j=1; j<nnz; j++) {
3027       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3028     }
3029   }
3030 #endif
3031 
3032   for (i=0; i<m; i++) {
3033     nnz     = Ii[i+1]- Ii[i];
3034     JJ      = J + Ii[i];
3035     nnz_max = PetscMax(nnz_max,nnz);
3036     for (j=0; j<nnz; j++) {
3037       if (*JJ >= cstart) break;
3038       JJ++;
3039     }
3040     d = 0;
3041     for (; j<nnz; j++) {
3042       if (*JJ++ >= cend) break;
3043       d++;
3044     }
3045     d_nnz[i] = d;
3046     o_nnz[i] = nnz - d;
3047   }
3048   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3049   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3050 
3051   if (v) values = (PetscScalar*)v;
3052   else {
3053     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3054     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3055   }
3056 
3057   for (i=0; i<m; i++) {
3058     ii   = i + rstart;
3059     nnz  = Ii[i+1]- Ii[i];
3060     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3061   }
3062   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3063   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3064 
3065   if (!v) {
3066     ierr = PetscFree(values);CHKERRQ(ierr);
3067   }
3068   PetscFunctionReturn(0);
3069 }
3070 EXTERN_C_END
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3074 /*@
3075    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3076    (the default parallel PETSc format).
3077 
3078    Collective on MPI_Comm
3079 
3080    Input Parameters:
3081 +  B - the matrix
3082 .  i - the indices into j for the start of each local row (starts with zero)
3083 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3084 -  v - optional values in the matrix
3085 
3086    Level: developer
3087 
3088    Notes:
3089        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3090      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3091      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3092 
3093        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3094 
3095        The format which is used for the sparse matrix input, is equivalent to a
3096     row-major ordering.. i.e for the following matrix, the input data expected is
3097     as shown:
3098 
3099         1 0 0
3100         2 0 3     P0
3101        -------
3102         4 5 6     P1
3103 
3104      Process0 [P0]: rows_owned=[0,1]
3105         i =  {0,1,3}  [size = nrow+1  = 2+1]
3106         j =  {0,0,2}  [size = nz = 6]
3107         v =  {1,2,3}  [size = nz = 6]
3108 
3109      Process1 [P1]: rows_owned=[2]
3110         i =  {0,3}    [size = nrow+1  = 1+1]
3111         j =  {0,1,2}  [size = nz = 6]
3112         v =  {4,5,6}  [size = nz = 6]
3113 
3114       The column indices for each row MUST be sorted.
3115 
3116 .keywords: matrix, aij, compressed row, sparse, parallel
3117 
3118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3119           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3120 @*/
3121 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3122 {
3123   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3124 
3125   PetscFunctionBegin;
3126   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3127   if (f) {
3128     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3129   }
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 #undef __FUNCT__
3134 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3135 /*@C
3136    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3137    (the default parallel PETSc format).  For good matrix assembly performance
3138    the user should preallocate the matrix storage by setting the parameters
3139    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3140    performance can be increased by more than a factor of 50.
3141 
3142    Collective on MPI_Comm
3143 
3144    Input Parameters:
3145 +  A - the matrix
3146 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3147            (same value is used for all local rows)
3148 .  d_nnz - array containing the number of nonzeros in the various rows of the
3149            DIAGONAL portion of the local submatrix (possibly different for each row)
3150            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3151            The size of this array is equal to the number of local rows, i.e 'm'.
3152            You must leave room for the diagonal entry even if it is zero.
3153 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3154            submatrix (same value is used for all local rows).
3155 -  o_nnz - array containing the number of nonzeros in the various rows of the
3156            OFF-DIAGONAL portion of the local submatrix (possibly different for
3157            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3158            structure. The size of this array is equal to the number
3159            of local rows, i.e 'm'.
3160 
3161    If the *_nnz parameter is given then the *_nz parameter is ignored
3162 
3163    The AIJ format (also called the Yale sparse matrix format or
3164    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3165    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3166 
3167    The parallel matrix is partitioned such that the first m0 rows belong to
3168    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3169    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3170 
3171    The DIAGONAL portion of the local submatrix of a processor can be defined
3172    as the submatrix which is obtained by extraction the part corresponding
3173    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3174    first row that belongs to the processor, and r2 is the last row belonging
3175    to the this processor. This is a square mxm matrix. The remaining portion
3176    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3177 
3178    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3179 
3180    You can call MatGetInfo() to get information on how effective the preallocation was;
3181    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3182    You can also run with the option -info and look for messages with the string
3183    malloc in them to see if additional memory allocation was needed.
3184 
3185    Example usage:
3186 
3187    Consider the following 8x8 matrix with 34 non-zero values, that is
3188    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3189    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3190    as follows:
3191 
3192 .vb
3193             1  2  0  |  0  3  0  |  0  4
3194     Proc0   0  5  6  |  7  0  0  |  8  0
3195             9  0 10  | 11  0  0  | 12  0
3196     -------------------------------------
3197            13  0 14  | 15 16 17  |  0  0
3198     Proc1   0 18  0  | 19 20 21  |  0  0
3199             0  0  0  | 22 23  0  | 24  0
3200     -------------------------------------
3201     Proc2  25 26 27  |  0  0 28  | 29  0
3202            30  0  0  | 31 32 33  |  0 34
3203 .ve
3204 
3205    This can be represented as a collection of submatrices as:
3206 
3207 .vb
3208       A B C
3209       D E F
3210       G H I
3211 .ve
3212 
3213    Where the submatrices A,B,C are owned by proc0, D,E,F are
3214    owned by proc1, G,H,I are owned by proc2.
3215 
3216    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3217    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3218    The 'M','N' parameters are 8,8, and have the same values on all procs.
3219 
3220    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3221    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3222    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3223    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3224    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3225    matrix, ans [DF] as another SeqAIJ matrix.
3226 
3227    When d_nz, o_nz parameters are specified, d_nz storage elements are
3228    allocated for every row of the local diagonal submatrix, and o_nz
3229    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3230    One way to choose d_nz and o_nz is to use the max nonzerors per local
3231    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3232    In this case, the values of d_nz,o_nz are:
3233 .vb
3234      proc0 : dnz = 2, o_nz = 2
3235      proc1 : dnz = 3, o_nz = 2
3236      proc2 : dnz = 1, o_nz = 4
3237 .ve
3238    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3239    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3240    for proc3. i.e we are using 12+15+10=37 storage locations to store
3241    34 values.
3242 
3243    When d_nnz, o_nnz parameters are specified, the storage is specified
3244    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3245    In the above case the values for d_nnz,o_nnz are:
3246 .vb
3247      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3248      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3249      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3250 .ve
3251    Here the space allocated is sum of all the above values i.e 34, and
3252    hence pre-allocation is perfect.
3253 
3254    Level: intermediate
3255 
3256 .keywords: matrix, aij, compressed row, sparse, parallel
3257 
3258 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3259           MPIAIJ, MatGetInfo()
3260 @*/
3261 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3262 {
3263   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3264 
3265   PetscFunctionBegin;
3266   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3267   if (f) {
3268     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3269   }
3270   PetscFunctionReturn(0);
3271 }
3272 
3273 #undef __FUNCT__
3274 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3275 /*@
3276      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3277          CSR format the local rows.
3278 
3279    Collective on MPI_Comm
3280 
3281    Input Parameters:
3282 +  comm - MPI communicator
3283 .  m - number of local rows (Cannot be PETSC_DECIDE)
3284 .  n - This value should be the same as the local size used in creating the
3285        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3286        calculated if N is given) For square matrices n is almost always m.
3287 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3288 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3289 .   i - row indices
3290 .   j - column indices
3291 -   a - matrix values
3292 
3293    Output Parameter:
3294 .   mat - the matrix
3295 
3296    Level: intermediate
3297 
3298    Notes:
3299        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3300      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3301      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3302 
3303        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3304 
3305        The format which is used for the sparse matrix input, is equivalent to a
3306     row-major ordering.. i.e for the following matrix, the input data expected is
3307     as shown:
3308 
3309         1 0 0
3310         2 0 3     P0
3311        -------
3312         4 5 6     P1
3313 
3314      Process0 [P0]: rows_owned=[0,1]
3315         i =  {0,1,3}  [size = nrow+1  = 2+1]
3316         j =  {0,0,2}  [size = nz = 6]
3317         v =  {1,2,3}  [size = nz = 6]
3318 
3319      Process1 [P1]: rows_owned=[2]
3320         i =  {0,3}    [size = nrow+1  = 1+1]
3321         j =  {0,1,2}  [size = nz = 6]
3322         v =  {4,5,6}  [size = nz = 6]
3323 
3324 .keywords: matrix, aij, compressed row, sparse, parallel
3325 
3326 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3327           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3328 @*/
3329 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3330 {
3331   PetscErrorCode ierr;
3332 
3333  PetscFunctionBegin;
3334   if (i[0]) {
3335     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3336   }
3337   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3338   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3339   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3340   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3341   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3342   PetscFunctionReturn(0);
3343 }
3344 
3345 #undef __FUNCT__
3346 #define __FUNCT__ "MatCreateMPIAIJ"
3347 /*@C
3348    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3349    (the default parallel PETSc format).  For good matrix assembly performance
3350    the user should preallocate the matrix storage by setting the parameters
3351    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3352    performance can be increased by more than a factor of 50.
3353 
3354    Collective on MPI_Comm
3355 
3356    Input Parameters:
3357 +  comm - MPI communicator
3358 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3359            This value should be the same as the local size used in creating the
3360            y vector for the matrix-vector product y = Ax.
3361 .  n - This value should be the same as the local size used in creating the
3362        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3363        calculated if N is given) For square matrices n is almost always m.
3364 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3365 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3366 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3367            (same value is used for all local rows)
3368 .  d_nnz - array containing the number of nonzeros in the various rows of the
3369            DIAGONAL portion of the local submatrix (possibly different for each row)
3370            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3371            The size of this array is equal to the number of local rows, i.e 'm'.
3372            You must leave room for the diagonal entry even if it is zero.
3373 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3374            submatrix (same value is used for all local rows).
3375 -  o_nnz - array containing the number of nonzeros in the various rows of the
3376            OFF-DIAGONAL portion of the local submatrix (possibly different for
3377            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3378            structure. The size of this array is equal to the number
3379            of local rows, i.e 'm'.
3380 
3381    Output Parameter:
3382 .  A - the matrix
3383 
3384    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3385    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3386    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3387    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3388 
3389    Notes:
3390    If the *_nnz parameter is given then the *_nz parameter is ignored
3391 
3392    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3393    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3394    storage requirements for this matrix.
3395 
3396    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3397    processor than it must be used on all processors that share the object for
3398    that argument.
3399 
3400    The user MUST specify either the local or global matrix dimensions
3401    (possibly both).
3402 
3403    The parallel matrix is partitioned across processors such that the
3404    first m0 rows belong to process 0, the next m1 rows belong to
3405    process 1, the next m2 rows belong to process 2 etc.. where
3406    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3407    values corresponding to [m x N] submatrix.
3408 
3409    The columns are logically partitioned with the n0 columns belonging
3410    to 0th partition, the next n1 columns belonging to the next
3411    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3412 
3413    The DIAGONAL portion of the local submatrix on any given processor
3414    is the submatrix corresponding to the rows and columns m,n
3415    corresponding to the given processor. i.e diagonal matrix on
3416    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3417    etc. The remaining portion of the local submatrix [m x (N-n)]
3418    constitute the OFF-DIAGONAL portion. The example below better
3419    illustrates this concept.
3420 
3421    For a square global matrix we define each processor's diagonal portion
3422    to be its local rows and the corresponding columns (a square submatrix);
3423    each processor's off-diagonal portion encompasses the remainder of the
3424    local matrix (a rectangular submatrix).
3425 
3426    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3427 
3428    When calling this routine with a single process communicator, a matrix of
3429    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3430    type of communicator, use the construction mechanism:
3431      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3432 
3433    By default, this format uses inodes (identical nodes) when possible.
3434    We search for consecutive rows with the same nonzero structure, thereby
3435    reusing matrix information to achieve increased efficiency.
3436 
3437    Options Database Keys:
3438 +  -mat_no_inode  - Do not use inodes
3439 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3440 -  -mat_aij_oneindex - Internally use indexing starting at 1
3441         rather than 0.  Note that when calling MatSetValues(),
3442         the user still MUST index entries starting at 0!
3443 
3444 
3445    Example usage:
3446 
3447    Consider the following 8x8 matrix with 34 non-zero values, that is
3448    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3449    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3450    as follows:
3451 
3452 .vb
3453             1  2  0  |  0  3  0  |  0  4
3454     Proc0   0  5  6  |  7  0  0  |  8  0
3455             9  0 10  | 11  0  0  | 12  0
3456     -------------------------------------
3457            13  0 14  | 15 16 17  |  0  0
3458     Proc1   0 18  0  | 19 20 21  |  0  0
3459             0  0  0  | 22 23  0  | 24  0
3460     -------------------------------------
3461     Proc2  25 26 27  |  0  0 28  | 29  0
3462            30  0  0  | 31 32 33  |  0 34
3463 .ve
3464 
3465    This can be represented as a collection of submatrices as:
3466 
3467 .vb
3468       A B C
3469       D E F
3470       G H I
3471 .ve
3472 
3473    Where the submatrices A,B,C are owned by proc0, D,E,F are
3474    owned by proc1, G,H,I are owned by proc2.
3475 
3476    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3477    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3478    The 'M','N' parameters are 8,8, and have the same values on all procs.
3479 
3480    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3481    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3482    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3483    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3484    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3485    matrix, ans [DF] as another SeqAIJ matrix.
3486 
3487    When d_nz, o_nz parameters are specified, d_nz storage elements are
3488    allocated for every row of the local diagonal submatrix, and o_nz
3489    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3490    One way to choose d_nz and o_nz is to use the max nonzerors per local
3491    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3492    In this case, the values of d_nz,o_nz are:
3493 .vb
3494      proc0 : dnz = 2, o_nz = 2
3495      proc1 : dnz = 3, o_nz = 2
3496      proc2 : dnz = 1, o_nz = 4
3497 .ve
3498    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3499    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3500    for proc3. i.e we are using 12+15+10=37 storage locations to store
3501    34 values.
3502 
3503    When d_nnz, o_nnz parameters are specified, the storage is specified
3504    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3505    In the above case the values for d_nnz,o_nnz are:
3506 .vb
3507      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3508      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3509      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3510 .ve
3511    Here the space allocated is sum of all the above values i.e 34, and
3512    hence pre-allocation is perfect.
3513 
3514    Level: intermediate
3515 
3516 .keywords: matrix, aij, compressed row, sparse, parallel
3517 
3518 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3519           MPIAIJ, MatCreateMPIAIJWithArrays()
3520 @*/
3521 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3522 {
3523   PetscErrorCode ierr;
3524   PetscMPIInt    size;
3525 
3526   PetscFunctionBegin;
3527   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3528   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3529   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3530   if (size > 1) {
3531     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3532     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3533   } else {
3534     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3535     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3536   }
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 #undef __FUNCT__
3541 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3542 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3543 {
3544   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3545 
3546   PetscFunctionBegin;
3547   *Ad     = a->A;
3548   *Ao     = a->B;
3549   *colmap = a->garray;
3550   PetscFunctionReturn(0);
3551 }
3552 
3553 #undef __FUNCT__
3554 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3555 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3556 {
3557   PetscErrorCode ierr;
3558   PetscInt       i;
3559   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3560 
3561   PetscFunctionBegin;
3562   if (coloring->ctype == IS_COLORING_GLOBAL) {
3563     ISColoringValue *allcolors,*colors;
3564     ISColoring      ocoloring;
3565 
3566     /* set coloring for diagonal portion */
3567     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3568 
3569     /* set coloring for off-diagonal portion */
3570     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3571     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3572     for (i=0; i<a->B->cmap.n; i++) {
3573       colors[i] = allcolors[a->garray[i]];
3574     }
3575     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3576     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3577     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3578     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3579   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3580     ISColoringValue *colors;
3581     PetscInt        *larray;
3582     ISColoring      ocoloring;
3583 
3584     /* set coloring for diagonal portion */
3585     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3586     for (i=0; i<a->A->cmap.n; i++) {
3587       larray[i] = i + A->cmap.rstart;
3588     }
3589     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3590     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3591     for (i=0; i<a->A->cmap.n; i++) {
3592       colors[i] = coloring->colors[larray[i]];
3593     }
3594     ierr = PetscFree(larray);CHKERRQ(ierr);
3595     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3596     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3597     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3598 
3599     /* set coloring for off-diagonal portion */
3600     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3601     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3602     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3603     for (i=0; i<a->B->cmap.n; i++) {
3604       colors[i] = coloring->colors[larray[i]];
3605     }
3606     ierr = PetscFree(larray);CHKERRQ(ierr);
3607     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3608     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3609     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3610   } else {
3611     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3612   }
3613 
3614   PetscFunctionReturn(0);
3615 }
3616 
3617 #if defined(PETSC_HAVE_ADIC)
3618 #undef __FUNCT__
3619 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3620 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3621 {
3622   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3623   PetscErrorCode ierr;
3624 
3625   PetscFunctionBegin;
3626   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3627   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3628   PetscFunctionReturn(0);
3629 }
3630 #endif
3631 
3632 #undef __FUNCT__
3633 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3634 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3635 {
3636   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3637   PetscErrorCode ierr;
3638 
3639   PetscFunctionBegin;
3640   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3641   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3642   PetscFunctionReturn(0);
3643 }
3644 
3645 #undef __FUNCT__
3646 #define __FUNCT__ "MatMerge"
3647 /*@
3648       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3649                  matrices from each processor
3650 
3651     Collective on MPI_Comm
3652 
3653    Input Parameters:
3654 +    comm - the communicators the parallel matrix will live on
3655 .    inmat - the input sequential matrices
3656 .    n - number of local columns (or PETSC_DECIDE)
3657 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3658 
3659    Output Parameter:
3660 .    outmat - the parallel matrix generated
3661 
3662     Level: advanced
3663 
3664    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3665 
3666 @*/
3667 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3668 {
3669   PetscErrorCode ierr;
3670   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3671   PetscInt       *indx;
3672   PetscScalar    *values;
3673 
3674   PetscFunctionBegin;
3675   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3676   if (scall == MAT_INITIAL_MATRIX){
3677     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3678     if (n == PETSC_DECIDE){
3679       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3680     }
3681     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3682     rstart -= m;
3683 
3684     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3685     for (i=0;i<m;i++) {
3686       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3687       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3688       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3689     }
3690     /* This routine will ONLY return MPIAIJ type matrix */
3691     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3692     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3693     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3694     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3695     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3696 
3697   } else if (scall == MAT_REUSE_MATRIX){
3698     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3699   } else {
3700     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3701   }
3702 
3703   for (i=0;i<m;i++) {
3704     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3705     Ii    = i + rstart;
3706     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3707     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3708   }
3709   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3710   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3712 
3713   PetscFunctionReturn(0);
3714 }
3715 
3716 #undef __FUNCT__
3717 #define __FUNCT__ "MatFileSplit"
3718 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3719 {
3720   PetscErrorCode    ierr;
3721   PetscMPIInt       rank;
3722   PetscInt          m,N,i,rstart,nnz;
3723   size_t            len;
3724   const PetscInt    *indx;
3725   PetscViewer       out;
3726   char              *name;
3727   Mat               B;
3728   const PetscScalar *values;
3729 
3730   PetscFunctionBegin;
3731   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3732   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3733   /* Should this be the type of the diagonal block of A? */
3734   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3735   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3736   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3737   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3738   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3739   for (i=0;i<m;i++) {
3740     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3741     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3742     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3743   }
3744   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3745   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3746 
3747   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3748   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3749   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3750   sprintf(name,"%s.%d",outfile,rank);
3751   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3752   ierr = PetscFree(name);
3753   ierr = MatView(B,out);CHKERRQ(ierr);
3754   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3755   ierr = MatDestroy(B);CHKERRQ(ierr);
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3762 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3763 {
3764   PetscErrorCode       ierr;
3765   Mat_Merge_SeqsToMPI  *merge;
3766   PetscContainer       container;
3767 
3768   PetscFunctionBegin;
3769   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3770   if (container) {
3771     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3772     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3773     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3774     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3775     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3776     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3777     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3778     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3779     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3780     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3781     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3782     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3783 
3784     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3785     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3786   }
3787   ierr = PetscFree(merge);CHKERRQ(ierr);
3788 
3789   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3790   PetscFunctionReturn(0);
3791 }
3792 
3793 #include "src/mat/utils/freespace.h"
3794 #include "petscbt.h"
3795 
3796 #undef __FUNCT__
3797 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3798 /*@C
3799       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3800                  matrices from each processor
3801 
3802     Collective on MPI_Comm
3803 
3804    Input Parameters:
3805 +    comm - the communicators the parallel matrix will live on
3806 .    seqmat - the input sequential matrices
3807 .    m - number of local rows (or PETSC_DECIDE)
3808 .    n - number of local columns (or PETSC_DECIDE)
3809 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3810 
3811    Output Parameter:
3812 .    mpimat - the parallel matrix generated
3813 
3814     Level: advanced
3815 
3816    Notes:
3817      The dimensions of the sequential matrix in each processor MUST be the same.
3818      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3819      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3820 @*/
3821 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3822 {
3823   PetscErrorCode       ierr;
3824   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3825   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3826   PetscMPIInt          size,rank,taga,*len_s;
3827   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3828   PetscInt             proc,m;
3829   PetscInt             **buf_ri,**buf_rj;
3830   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3831   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3832   MPI_Request          *s_waits,*r_waits;
3833   MPI_Status           *status;
3834   MatScalar            *aa=a->a;
3835   MatScalar            **abuf_r,*ba_i;
3836   Mat_Merge_SeqsToMPI  *merge;
3837   PetscContainer       container;
3838 
3839   PetscFunctionBegin;
3840   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3841 
3842   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3843   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3844 
3845   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3846   if (container) {
3847     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3848   }
3849   bi     = merge->bi;
3850   bj     = merge->bj;
3851   buf_ri = merge->buf_ri;
3852   buf_rj = merge->buf_rj;
3853 
3854   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3855   owners = merge->rowmap.range;
3856   len_s  = merge->len_s;
3857 
3858   /* send and recv matrix values */
3859   /*-----------------------------*/
3860   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3861   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3862 
3863   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3864   for (proc=0,k=0; proc<size; proc++){
3865     if (!len_s[proc]) continue;
3866     i = owners[proc];
3867     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3868     k++;
3869   }
3870 
3871   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3872   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3873   ierr = PetscFree(status);CHKERRQ(ierr);
3874 
3875   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3876   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3877 
3878   /* insert mat values of mpimat */
3879   /*----------------------------*/
3880   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
3881   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3882   nextrow = buf_ri_k + merge->nrecv;
3883   nextai  = nextrow + merge->nrecv;
3884 
3885   for (k=0; k<merge->nrecv; k++){
3886     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3887     nrows = *(buf_ri_k[k]);
3888     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3889     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3890   }
3891 
3892   /* set values of ba */
3893   m = merge->rowmap.n;
3894   for (i=0; i<m; i++) {
3895     arow = owners[rank] + i;
3896     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3897     bnzi = bi[i+1] - bi[i];
3898     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3899 
3900     /* add local non-zero vals of this proc's seqmat into ba */
3901     anzi = ai[arow+1] - ai[arow];
3902     aj   = a->j + ai[arow];
3903     aa   = a->a + ai[arow];
3904     nextaj = 0;
3905     for (j=0; nextaj<anzi; j++){
3906       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3907         ba_i[j] += aa[nextaj++];
3908       }
3909     }
3910 
3911     /* add received vals into ba */
3912     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3913       /* i-th row */
3914       if (i == *nextrow[k]) {
3915         anzi = *(nextai[k]+1) - *nextai[k];
3916         aj   = buf_rj[k] + *(nextai[k]);
3917         aa   = abuf_r[k] + *(nextai[k]);
3918         nextaj = 0;
3919         for (j=0; nextaj<anzi; j++){
3920           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3921             ba_i[j] += aa[nextaj++];
3922           }
3923         }
3924         nextrow[k]++; nextai[k]++;
3925       }
3926     }
3927     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3928   }
3929   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3930   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931 
3932   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3933   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3934   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3935   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3936   PetscFunctionReturn(0);
3937 }
3938 
3939 #undef __FUNCT__
3940 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3941 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3942 {
3943   PetscErrorCode       ierr;
3944   Mat                  B_mpi;
3945   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3946   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3947   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3948   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3949   PetscInt             len,proc,*dnz,*onz;
3950   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3951   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3952   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3953   MPI_Status           *status;
3954   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3955   PetscBT              lnkbt;
3956   Mat_Merge_SeqsToMPI  *merge;
3957   PetscContainer       container;
3958 
3959   PetscFunctionBegin;
3960   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3961 
3962   /* make sure it is a PETSc comm */
3963   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3964   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3965   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3966 
3967   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3968   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3969 
3970   /* determine row ownership */
3971   /*---------------------------------------------------------*/
3972   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3973   merge->rowmap.n = m;
3974   merge->rowmap.N = M;
3975   merge->rowmap.bs = 1;
3976   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3977   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3978   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3979 
3980   m      = merge->rowmap.n;
3981   M      = merge->rowmap.N;
3982   owners = merge->rowmap.range;
3983 
3984   /* determine the number of messages to send, their lengths */
3985   /*---------------------------------------------------------*/
3986   len_s  = merge->len_s;
3987 
3988   len = 0;  /* length of buf_si[] */
3989   merge->nsend = 0;
3990   for (proc=0; proc<size; proc++){
3991     len_si[proc] = 0;
3992     if (proc == rank){
3993       len_s[proc] = 0;
3994     } else {
3995       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3996       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3997     }
3998     if (len_s[proc]) {
3999       merge->nsend++;
4000       nrows = 0;
4001       for (i=owners[proc]; i<owners[proc+1]; i++){
4002         if (ai[i+1] > ai[i]) nrows++;
4003       }
4004       len_si[proc] = 2*(nrows+1);
4005       len += len_si[proc];
4006     }
4007   }
4008 
4009   /* determine the number and length of messages to receive for ij-structure */
4010   /*-------------------------------------------------------------------------*/
4011   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4012   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4013 
4014   /* post the Irecv of j-structure */
4015   /*-------------------------------*/
4016   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4017   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4018 
4019   /* post the Isend of j-structure */
4020   /*--------------------------------*/
4021   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4022   sj_waits = si_waits + merge->nsend;
4023 
4024   for (proc=0, k=0; proc<size; proc++){
4025     if (!len_s[proc]) continue;
4026     i = owners[proc];
4027     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4028     k++;
4029   }
4030 
4031   /* receives and sends of j-structure are complete */
4032   /*------------------------------------------------*/
4033   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4034   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4035 
4036   /* send and recv i-structure */
4037   /*---------------------------*/
4038   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4039   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4040 
4041   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4042   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4043   for (proc=0,k=0; proc<size; proc++){
4044     if (!len_s[proc]) continue;
4045     /* form outgoing message for i-structure:
4046          buf_si[0]:                 nrows to be sent
4047                [1:nrows]:           row index (global)
4048                [nrows+1:2*nrows+1]: i-structure index
4049     */
4050     /*-------------------------------------------*/
4051     nrows = len_si[proc]/2 - 1;
4052     buf_si_i    = buf_si + nrows+1;
4053     buf_si[0]   = nrows;
4054     buf_si_i[0] = 0;
4055     nrows = 0;
4056     for (i=owners[proc]; i<owners[proc+1]; i++){
4057       anzi = ai[i+1] - ai[i];
4058       if (anzi) {
4059         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4060         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4061         nrows++;
4062       }
4063     }
4064     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4065     k++;
4066     buf_si += len_si[proc];
4067   }
4068 
4069   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4070   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4071 
4072   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4073   for (i=0; i<merge->nrecv; i++){
4074     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4075   }
4076 
4077   ierr = PetscFree(len_si);CHKERRQ(ierr);
4078   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4079   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4080   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4081   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4082   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4083   ierr = PetscFree(status);CHKERRQ(ierr);
4084 
4085   /* compute a local seq matrix in each processor */
4086   /*----------------------------------------------*/
4087   /* allocate bi array and free space for accumulating nonzero column info */
4088   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4089   bi[0] = 0;
4090 
4091   /* create and initialize a linked list */
4092   nlnk = N+1;
4093   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4094 
4095   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4096   len = 0;
4097   len  = ai[owners[rank+1]] - ai[owners[rank]];
4098   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4099   current_space = free_space;
4100 
4101   /* determine symbolic info for each local row */
4102   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4103   nextrow = buf_ri_k + merge->nrecv;
4104   nextai  = nextrow + merge->nrecv;
4105   for (k=0; k<merge->nrecv; k++){
4106     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4107     nrows = *buf_ri_k[k];
4108     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4109     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4110   }
4111 
4112   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4113   len = 0;
4114   for (i=0;i<m;i++) {
4115     bnzi   = 0;
4116     /* add local non-zero cols of this proc's seqmat into lnk */
4117     arow   = owners[rank] + i;
4118     anzi   = ai[arow+1] - ai[arow];
4119     aj     = a->j + ai[arow];
4120     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4121     bnzi += nlnk;
4122     /* add received col data into lnk */
4123     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4124       if (i == *nextrow[k]) { /* i-th row */
4125         anzi = *(nextai[k]+1) - *nextai[k];
4126         aj   = buf_rj[k] + *nextai[k];
4127         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4128         bnzi += nlnk;
4129         nextrow[k]++; nextai[k]++;
4130       }
4131     }
4132     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4133 
4134     /* if free space is not available, make more free space */
4135     if (current_space->local_remaining<bnzi) {
4136       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4137       nspacedouble++;
4138     }
4139     /* copy data into free space, then initialize lnk */
4140     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4141     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4142 
4143     current_space->array           += bnzi;
4144     current_space->local_used      += bnzi;
4145     current_space->local_remaining -= bnzi;
4146 
4147     bi[i+1] = bi[i] + bnzi;
4148   }
4149 
4150   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4151 
4152   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4153   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4154   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4155 
4156   /* create symbolic parallel matrix B_mpi */
4157   /*---------------------------------------*/
4158   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4159   if (n==PETSC_DECIDE) {
4160     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4161   } else {
4162     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4163   }
4164   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4165   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4166   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4167 
4168   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4169   B_mpi->assembled     = PETSC_FALSE;
4170   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4171   merge->bi            = bi;
4172   merge->bj            = bj;
4173   merge->buf_ri        = buf_ri;
4174   merge->buf_rj        = buf_rj;
4175   merge->coi           = PETSC_NULL;
4176   merge->coj           = PETSC_NULL;
4177   merge->owners_co     = PETSC_NULL;
4178 
4179   /* attach the supporting struct to B_mpi for reuse */
4180   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4181   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4182   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4183   *mpimat = B_mpi;
4184 
4185   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4186   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4187   PetscFunctionReturn(0);
4188 }
4189 
4190 #undef __FUNCT__
4191 #define __FUNCT__ "MatMerge_SeqsToMPI"
4192 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4193 {
4194   PetscErrorCode   ierr;
4195 
4196   PetscFunctionBegin;
4197   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4198   if (scall == MAT_INITIAL_MATRIX){
4199     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4200   }
4201   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4202   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4203   PetscFunctionReturn(0);
4204 }
4205 
4206 #undef __FUNCT__
4207 #define __FUNCT__ "MatGetLocalMat"
4208 /*@
4209      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4210 
4211     Not Collective
4212 
4213    Input Parameters:
4214 +    A - the matrix
4215 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4216 
4217    Output Parameter:
4218 .    A_loc - the local sequential matrix generated
4219 
4220     Level: developer
4221 
4222 @*/
4223 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4224 {
4225   PetscErrorCode  ierr;
4226   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4227   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4228   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4229   MatScalar       *aa=a->a,*ba=b->a,*cam;
4230   PetscScalar     *ca;
4231   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4232   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4233 
4234   PetscFunctionBegin;
4235   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4236   if (scall == MAT_INITIAL_MATRIX){
4237     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4238     ci[0] = 0;
4239     for (i=0; i<am; i++){
4240       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4241     }
4242     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4243     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4244     k = 0;
4245     for (i=0; i<am; i++) {
4246       ncols_o = bi[i+1] - bi[i];
4247       ncols_d = ai[i+1] - ai[i];
4248       /* off-diagonal portion of A */
4249       for (jo=0; jo<ncols_o; jo++) {
4250         col = cmap[*bj];
4251         if (col >= cstart) break;
4252         cj[k]   = col; bj++;
4253         ca[k++] = *ba++;
4254       }
4255       /* diagonal portion of A */
4256       for (j=0; j<ncols_d; j++) {
4257         cj[k]   = cstart + *aj++;
4258         ca[k++] = *aa++;
4259       }
4260       /* off-diagonal portion of A */
4261       for (j=jo; j<ncols_o; j++) {
4262         cj[k]   = cmap[*bj++];
4263         ca[k++] = *ba++;
4264       }
4265     }
4266     /* put together the new matrix */
4267     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4268     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4269     /* Since these are PETSc arrays, change flags to free them as necessary. */
4270     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4271     mat->free_a  = PETSC_TRUE;
4272     mat->free_ij = PETSC_TRUE;
4273     mat->nonew   = 0;
4274   } else if (scall == MAT_REUSE_MATRIX){
4275     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4276     ci = mat->i; cj = mat->j; cam = mat->a;
4277     for (i=0; i<am; i++) {
4278       /* off-diagonal portion of A */
4279       ncols_o = bi[i+1] - bi[i];
4280       for (jo=0; jo<ncols_o; jo++) {
4281         col = cmap[*bj];
4282         if (col >= cstart) break;
4283         *cam++ = *ba++; bj++;
4284       }
4285       /* diagonal portion of A */
4286       ncols_d = ai[i+1] - ai[i];
4287       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4288       /* off-diagonal portion of A */
4289       for (j=jo; j<ncols_o; j++) {
4290         *cam++ = *ba++; bj++;
4291       }
4292     }
4293   } else {
4294     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4295   }
4296 
4297   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4298   PetscFunctionReturn(0);
4299 }
4300 
4301 #undef __FUNCT__
4302 #define __FUNCT__ "MatGetLocalMatCondensed"
4303 /*@C
4304      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4305 
4306     Not Collective
4307 
4308    Input Parameters:
4309 +    A - the matrix
4310 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4311 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4312 
4313    Output Parameter:
4314 .    A_loc - the local sequential matrix generated
4315 
4316     Level: developer
4317 
4318 @*/
4319 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4320 {
4321   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4322   PetscErrorCode    ierr;
4323   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4324   IS                isrowa,iscola;
4325   Mat               *aloc;
4326 
4327   PetscFunctionBegin;
4328   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4329   if (!row){
4330     start = A->rmap.rstart; end = A->rmap.rend;
4331     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4332   } else {
4333     isrowa = *row;
4334   }
4335   if (!col){
4336     start = A->cmap.rstart;
4337     cmap  = a->garray;
4338     nzA   = a->A->cmap.n;
4339     nzB   = a->B->cmap.n;
4340     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4341     ncols = 0;
4342     for (i=0; i<nzB; i++) {
4343       if (cmap[i] < start) idx[ncols++] = cmap[i];
4344       else break;
4345     }
4346     imark = i;
4347     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4348     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4349     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4350     ierr = PetscFree(idx);CHKERRQ(ierr);
4351   } else {
4352     iscola = *col;
4353   }
4354   if (scall != MAT_INITIAL_MATRIX){
4355     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4356     aloc[0] = *A_loc;
4357   }
4358   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4359   *A_loc = aloc[0];
4360   ierr = PetscFree(aloc);CHKERRQ(ierr);
4361   if (!row){
4362     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4363   }
4364   if (!col){
4365     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4366   }
4367   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4368   PetscFunctionReturn(0);
4369 }
4370 
4371 #undef __FUNCT__
4372 #define __FUNCT__ "MatGetBrowsOfAcols"
4373 /*@C
4374     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4375 
4376     Collective on Mat
4377 
4378    Input Parameters:
4379 +    A,B - the matrices in mpiaij format
4380 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4381 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4382 
4383    Output Parameter:
4384 +    rowb, colb - index sets of rows and columns of B to extract
4385 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4386 -    B_seq - the sequential matrix generated
4387 
4388     Level: developer
4389 
4390 @*/
4391 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4392 {
4393   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4394   PetscErrorCode    ierr;
4395   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4396   IS                isrowb,iscolb;
4397   Mat               *bseq;
4398 
4399   PetscFunctionBegin;
4400   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4401     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4402   }
4403   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4404 
4405   if (scall == MAT_INITIAL_MATRIX){
4406     start = A->cmap.rstart;
4407     cmap  = a->garray;
4408     nzA   = a->A->cmap.n;
4409     nzB   = a->B->cmap.n;
4410     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4411     ncols = 0;
4412     for (i=0; i<nzB; i++) {  /* row < local row index */
4413       if (cmap[i] < start) idx[ncols++] = cmap[i];
4414       else break;
4415     }
4416     imark = i;
4417     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4418     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4419     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4420     ierr = PetscFree(idx);CHKERRQ(ierr);
4421     *brstart = imark;
4422     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4423   } else {
4424     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4425     isrowb = *rowb; iscolb = *colb;
4426     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4427     bseq[0] = *B_seq;
4428   }
4429   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4430   *B_seq = bseq[0];
4431   ierr = PetscFree(bseq);CHKERRQ(ierr);
4432   if (!rowb){
4433     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4434   } else {
4435     *rowb = isrowb;
4436   }
4437   if (!colb){
4438     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4439   } else {
4440     *colb = iscolb;
4441   }
4442   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4443   PetscFunctionReturn(0);
4444 }
4445 
4446 #undef __FUNCT__
4447 #define __FUNCT__ "MatGetBrowsOfAoCols"
4448 /*@C
4449     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4450     of the OFF-DIAGONAL portion of local A
4451 
4452     Collective on Mat
4453 
4454    Input Parameters:
4455 +    A,B - the matrices in mpiaij format
4456 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4457 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4458 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4459 
4460    Output Parameter:
4461 +    B_oth - the sequential matrix generated
4462 
4463     Level: developer
4464 
4465 @*/
4466 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4467 {
4468   VecScatter_MPI_General *gen_to,*gen_from;
4469   PetscErrorCode         ierr;
4470   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4471   Mat_SeqAIJ             *b_oth;
4472   VecScatter             ctx=a->Mvctx;
4473   MPI_Comm               comm=((PetscObject)ctx)->comm;
4474   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4475   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4476   PetscScalar            *rvalues,*svalues;
4477   MatScalar              *b_otha,*bufa,*bufA;
4478   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4479   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4480   MPI_Status             *sstatus,rstatus;
4481   PetscMPIInt            jj;
4482   PetscInt               *cols,sbs,rbs;
4483   PetscScalar            *vals;
4484 
4485   PetscFunctionBegin;
4486   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4487     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4488   }
4489   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4490   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4491 
4492   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4493   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4494   rvalues  = gen_from->values; /* holds the length of receiving row */
4495   svalues  = gen_to->values;   /* holds the length of sending row */
4496   nrecvs   = gen_from->n;
4497   nsends   = gen_to->n;
4498 
4499   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4500   srow     = gen_to->indices;   /* local row index to be sent */
4501   sstarts  = gen_to->starts;
4502   sprocs   = gen_to->procs;
4503   sstatus  = gen_to->sstatus;
4504   sbs      = gen_to->bs;
4505   rstarts  = gen_from->starts;
4506   rprocs   = gen_from->procs;
4507   rbs      = gen_from->bs;
4508 
4509   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4510   if (scall == MAT_INITIAL_MATRIX){
4511     /* i-array */
4512     /*---------*/
4513     /*  post receives */
4514     for (i=0; i<nrecvs; i++){
4515       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4516       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4517       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4518     }
4519 
4520     /* pack the outgoing message */
4521     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4522     rstartsj = sstartsj + nsends +1;
4523     sstartsj[0] = 0;  rstartsj[0] = 0;
4524     len = 0; /* total length of j or a array to be sent */
4525     k = 0;
4526     for (i=0; i<nsends; i++){
4527       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4528       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4529       for (j=0; j<nrows; j++) {
4530         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4531         for (l=0; l<sbs; l++){
4532           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4533           rowlen[j*sbs+l] = ncols;
4534           len += ncols;
4535           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4536         }
4537         k++;
4538       }
4539       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4540       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4541     }
4542     /* recvs and sends of i-array are completed */
4543     i = nrecvs;
4544     while (i--) {
4545       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4546     }
4547     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4548 
4549     /* allocate buffers for sending j and a arrays */
4550     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4551     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4552 
4553     /* create i-array of B_oth */
4554     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4555     b_othi[0] = 0;
4556     len = 0; /* total length of j or a array to be received */
4557     k = 0;
4558     for (i=0; i<nrecvs; i++){
4559       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4560       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4561       for (j=0; j<nrows; j++) {
4562         b_othi[k+1] = b_othi[k] + rowlen[j];
4563         len += rowlen[j]; k++;
4564       }
4565       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4566     }
4567 
4568     /* allocate space for j and a arrrays of B_oth */
4569     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4570     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4571 
4572     /* j-array */
4573     /*---------*/
4574     /*  post receives of j-array */
4575     for (i=0; i<nrecvs; i++){
4576       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4577       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4578     }
4579 
4580     /* pack the outgoing message j-array */
4581     k = 0;
4582     for (i=0; i<nsends; i++){
4583       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4584       bufJ = bufj+sstartsj[i];
4585       for (j=0; j<nrows; j++) {
4586         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4587         for (ll=0; ll<sbs; ll++){
4588           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4589           for (l=0; l<ncols; l++){
4590             *bufJ++ = cols[l];
4591           }
4592           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4593         }
4594       }
4595       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4596     }
4597 
4598     /* recvs and sends of j-array are completed */
4599     i = nrecvs;
4600     while (i--) {
4601       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4602     }
4603     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4604   } else if (scall == MAT_REUSE_MATRIX){
4605     sstartsj = *startsj;
4606     rstartsj = sstartsj + nsends +1;
4607     bufa     = *bufa_ptr;
4608     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4609     b_otha   = b_oth->a;
4610   } else {
4611     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4612   }
4613 
4614   /* a-array */
4615   /*---------*/
4616   /*  post receives of a-array */
4617   for (i=0; i<nrecvs; i++){
4618     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4619     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4620   }
4621 
4622   /* pack the outgoing message a-array */
4623   k = 0;
4624   for (i=0; i<nsends; i++){
4625     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4626     bufA = bufa+sstartsj[i];
4627     for (j=0; j<nrows; j++) {
4628       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4629       for (ll=0; ll<sbs; ll++){
4630         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4631         for (l=0; l<ncols; l++){
4632           *bufA++ = vals[l];
4633         }
4634         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4635       }
4636     }
4637     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4638   }
4639   /* recvs and sends of a-array are completed */
4640   i = nrecvs;
4641   while (i--) {
4642     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4643   }
4644   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4645   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4646 
4647   if (scall == MAT_INITIAL_MATRIX){
4648     /* put together the new matrix */
4649     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4650 
4651     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4652     /* Since these are PETSc arrays, change flags to free them as necessary. */
4653     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4654     b_oth->free_a  = PETSC_TRUE;
4655     b_oth->free_ij = PETSC_TRUE;
4656     b_oth->nonew   = 0;
4657 
4658     ierr = PetscFree(bufj);CHKERRQ(ierr);
4659     if (!startsj || !bufa_ptr){
4660       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4661       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4662     } else {
4663       *startsj  = sstartsj;
4664       *bufa_ptr = bufa;
4665     }
4666   }
4667   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4668   PetscFunctionReturn(0);
4669 }
4670 
4671 #undef __FUNCT__
4672 #define __FUNCT__ "MatGetCommunicationStructs"
4673 /*@C
4674   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4675 
4676   Not Collective
4677 
4678   Input Parameters:
4679 . A - The matrix in mpiaij format
4680 
4681   Output Parameter:
4682 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4683 . colmap - A map from global column index to local index into lvec
4684 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4685 
4686   Level: developer
4687 
4688 @*/
4689 #if defined (PETSC_USE_CTABLE)
4690 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4691 #else
4692 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4693 #endif
4694 {
4695   Mat_MPIAIJ *a;
4696 
4697   PetscFunctionBegin;
4698   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4699   PetscValidPointer(lvec, 2)
4700   PetscValidPointer(colmap, 3)
4701   PetscValidPointer(multScatter, 4)
4702   a = (Mat_MPIAIJ *) A->data;
4703   if (lvec) *lvec = a->lvec;
4704   if (colmap) *colmap = a->colmap;
4705   if (multScatter) *multScatter = a->Mvctx;
4706   PetscFunctionReturn(0);
4707 }
4708 
4709 EXTERN_C_BEGIN
4710 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4711 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4712 EXTERN_C_END
4713 
4714 #include "src/mat/impls/dense/mpi/mpidense.h"
4715 
4716 #undef __FUNCT__
4717 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4718 /*
4719     Computes (B'*A')' since computing B*A directly is untenable
4720 
4721                n                       p                          p
4722         (              )       (              )         (                  )
4723       m (      A       )  *  n (       B      )   =   m (         C        )
4724         (              )       (              )         (                  )
4725 
4726 */
4727 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4728 {
4729   PetscErrorCode     ierr;
4730   Mat                At,Bt,Ct;
4731 
4732   PetscFunctionBegin;
4733   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4734   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4735   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4736   ierr = MatDestroy(At);CHKERRQ(ierr);
4737   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4738   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4739   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4740   PetscFunctionReturn(0);
4741 }
4742 
4743 #undef __FUNCT__
4744 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4745 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4746 {
4747   PetscErrorCode ierr;
4748   PetscInt       m=A->rmap.n,n=B->cmap.n;
4749   Mat            Cmat;
4750 
4751   PetscFunctionBegin;
4752   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
4753   ierr = MatCreate(A->hdr.comm,&Cmat);CHKERRQ(ierr);
4754   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4755   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4756   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4757   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4758   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4759   *C   = Cmat;
4760   PetscFunctionReturn(0);
4761 }
4762 
4763 /* ----------------------------------------------------------------*/
4764 #undef __FUNCT__
4765 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4766 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4767 {
4768   PetscErrorCode ierr;
4769 
4770   PetscFunctionBegin;
4771   if (scall == MAT_INITIAL_MATRIX){
4772     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4773   }
4774   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4775   PetscFunctionReturn(0);
4776 }
4777 
4778 /*MC
4779    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4780 
4781    Options Database Keys:
4782 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4783 
4784   Level: beginner
4785 
4786 .seealso: MatCreateMPIAIJ()
4787 M*/
4788 
4789 EXTERN_C_BEGIN
4790 #undef __FUNCT__
4791 #define __FUNCT__ "MatCreate_MPIAIJ"
4792 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4793 {
4794   Mat_MPIAIJ     *b;
4795   PetscErrorCode ierr;
4796   PetscMPIInt    size;
4797 
4798   PetscFunctionBegin;
4799   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4800 
4801   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4802   B->data         = (void*)b;
4803   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4804   B->factor       = 0;
4805   B->rmap.bs      = 1;
4806   B->assembled    = PETSC_FALSE;
4807   B->mapping      = 0;
4808 
4809   B->insertmode      = NOT_SET_VALUES;
4810   b->size            = size;
4811   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4812 
4813   /* build cache for off array entries formed */
4814   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4815   b->donotstash  = PETSC_FALSE;
4816   b->colmap      = 0;
4817   b->garray      = 0;
4818   b->roworiented = PETSC_TRUE;
4819 
4820   /* stuff used for matrix vector multiply */
4821   b->lvec      = PETSC_NULL;
4822   b->Mvctx     = PETSC_NULL;
4823 
4824   /* stuff for MatGetRow() */
4825   b->rowindices   = 0;
4826   b->rowvalues    = 0;
4827   b->getrowactive = PETSC_FALSE;
4828 
4829 
4830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4831                                      "MatStoreValues_MPIAIJ",
4832                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4834                                      "MatRetrieveValues_MPIAIJ",
4835                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4837 				     "MatGetDiagonalBlock_MPIAIJ",
4838                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4839   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4840 				     "MatIsTranspose_MPIAIJ",
4841 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4843 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4844 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4846 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4847 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4848   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4849 				     "MatDiagonalScaleLocal_MPIAIJ",
4850 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4852                                      "MatConvert_MPIAIJ_MPICSRPERM",
4853                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4855                                      "MatConvert_MPIAIJ_MPICRL",
4856                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4857   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
4858                                      "MatMatMult_MPIDense_MPIAIJ",
4859                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
4860   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
4861                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
4862                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
4863   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
4864                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
4865                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
4866   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4867   PetscFunctionReturn(0);
4868 }
4869 EXTERN_C_END
4870 
4871 #undef __FUNCT__
4872 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4873 /*@
4874      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4875          and "off-diagonal" part of the matrix in CSR format.
4876 
4877    Collective on MPI_Comm
4878 
4879    Input Parameters:
4880 +  comm - MPI communicator
4881 .  m - number of local rows (Cannot be PETSC_DECIDE)
4882 .  n - This value should be the same as the local size used in creating the
4883        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4884        calculated if N is given) For square matrices n is almost always m.
4885 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4886 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4887 .   i - row indices for "diagonal" portion of matrix
4888 .   j - column indices
4889 .   a - matrix values
4890 .   oi - row indices for "off-diagonal" portion of matrix
4891 .   oj - column indices
4892 -   oa - matrix values
4893 
4894    Output Parameter:
4895 .   mat - the matrix
4896 
4897    Level: advanced
4898 
4899    Notes:
4900        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4901 
4902        The i and j indices are 0 based
4903 
4904        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4905 
4906 
4907 .keywords: matrix, aij, compressed row, sparse, parallel
4908 
4909 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4910           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4911 @*/
4912 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4913 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4914 {
4915   PetscErrorCode ierr;
4916   Mat_MPIAIJ     *maij;
4917 
4918  PetscFunctionBegin;
4919   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4920   if (i[0]) {
4921     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4922   }
4923   if (oi[0]) {
4924     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4925   }
4926   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4927   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4928   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4929   maij = (Mat_MPIAIJ*) (*mat)->data;
4930   maij->donotstash     = PETSC_TRUE;
4931   (*mat)->preallocated = PETSC_TRUE;
4932 
4933   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4934   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4935   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4936 
4937   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4938   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4939 
4940   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4941   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4942   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4943   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4944 
4945   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4946   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4947   PetscFunctionReturn(0);
4948 }
4949 
4950 /*
4951     Special version for direct calls from Fortran
4952 */
4953 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4954 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4955 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4956 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4957 #endif
4958 
4959 /* Change these macros so can be used in void function */
4960 #undef CHKERRQ
4961 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4962 #undef SETERRQ2
4963 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4964 #undef SETERRQ
4965 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4966 
4967 EXTERN_C_BEGIN
4968 #undef __FUNCT__
4969 #define __FUNCT__ "matsetvaluesmpiaij_"
4970 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4971 {
4972   Mat             mat = *mmat;
4973   PetscInt        m = *mm, n = *mn;
4974   InsertMode      addv = *maddv;
4975   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
4976   PetscScalar     value;
4977   PetscErrorCode  ierr;
4978 
4979   MatPreallocated(mat);
4980   if (mat->insertmode == NOT_SET_VALUES) {
4981     mat->insertmode = addv;
4982   }
4983 #if defined(PETSC_USE_DEBUG)
4984   else if (mat->insertmode != addv) {
4985     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4986   }
4987 #endif
4988   {
4989   PetscInt        i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4990   PetscInt        cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4991   PetscTruth      roworiented = aij->roworiented;
4992 
4993   /* Some Variables required in the macro */
4994   Mat             A = aij->A;
4995   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
4996   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4997   MatScalar       *aa = a->a;
4998   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4999   Mat             B = aij->B;
5000   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5001   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
5002   MatScalar       *ba = b->a;
5003 
5004   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5005   PetscInt        nonew = a->nonew;
5006   MatScalar       *ap1,*ap2;
5007 
5008   PetscFunctionBegin;
5009   for (i=0; i<m; i++) {
5010     if (im[i] < 0) continue;
5011 #if defined(PETSC_USE_DEBUG)
5012     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
5013 #endif
5014     if (im[i] >= rstart && im[i] < rend) {
5015       row      = im[i] - rstart;
5016       lastcol1 = -1;
5017       rp1      = aj + ai[row];
5018       ap1      = aa + ai[row];
5019       rmax1    = aimax[row];
5020       nrow1    = ailen[row];
5021       low1     = 0;
5022       high1    = nrow1;
5023       lastcol2 = -1;
5024       rp2      = bj + bi[row];
5025       ap2      = ba + bi[row];
5026       rmax2    = bimax[row];
5027       nrow2    = bilen[row];
5028       low2     = 0;
5029       high2    = nrow2;
5030 
5031       for (j=0; j<n; j++) {
5032         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5033         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5034         if (in[j] >= cstart && in[j] < cend){
5035           col = in[j] - cstart;
5036           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5037         } else if (in[j] < 0) continue;
5038 #if defined(PETSC_USE_DEBUG)
5039         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
5040 #endif
5041         else {
5042           if (mat->was_assembled) {
5043             if (!aij->colmap) {
5044               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5045             }
5046 #if defined (PETSC_USE_CTABLE)
5047             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5048 	    col--;
5049 #else
5050             col = aij->colmap[in[j]] - 1;
5051 #endif
5052             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5053               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5054               col =  in[j];
5055               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5056               B = aij->B;
5057               b = (Mat_SeqAIJ*)B->data;
5058               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5059               rp2      = bj + bi[row];
5060               ap2      = ba + bi[row];
5061               rmax2    = bimax[row];
5062               nrow2    = bilen[row];
5063               low2     = 0;
5064               high2    = nrow2;
5065               bm       = aij->B->rmap.n;
5066               ba = b->a;
5067             }
5068           } else col = in[j];
5069           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5070         }
5071       }
5072     } else {
5073       if (!aij->donotstash) {
5074         if (roworiented) {
5075           if (ignorezeroentries && v[i*n] == 0.0) continue;
5076           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5077         } else {
5078           if (ignorezeroentries && v[i] == 0.0) continue;
5079           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5080         }
5081       }
5082     }
5083   }}
5084   PetscFunctionReturnVoid();
5085 }
5086 EXTERN_C_END
5087 
5088