xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 18404f68340fb1dd867b2411df75ee50e39fea2d)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302 
1303   PetscFunctionBegin;
1304   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1305   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1306   nz   = A->nz + B->nz;
1307   if (!rank) {
1308     header[0] = MAT_FILE_CLASSID;
1309     header[1] = mat->rmap->N;
1310     header[2] = mat->cmap->N;
1311     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1312     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1313     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1314     /* get largest number of rows any processor has */
1315     rlen = mat->rmap->n;
1316     range = mat->rmap->range;
1317     for (i=1; i<size; i++) {
1318       rlen = PetscMax(rlen,range[i+1] - range[i]);
1319     }
1320   } else {
1321     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1322     rlen = mat->rmap->n;
1323   }
1324 
1325   /* load up the local row counts */
1326   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1327   for (i=0; i<mat->rmap->n; i++) {
1328     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1329   }
1330 
1331   /* store the row lengths to the file */
1332   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1333   if (!rank) {
1334     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     for (i=1; i<size; i++) {
1336       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1337       rlen = range[i+1] - range[i];
1338       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1340     }
1341     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1342   } else {
1343     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1344     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1346   }
1347   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1348 
1349   /* load up the local column indices */
1350   nzmax = nz; /* )th processor needs space a largest processor needs */
1351   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1352   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1353   cnt  = 0;
1354   for (i=0; i<mat->rmap->n; i++) {
1355     for (j=B->i[i]; j<B->i[i+1]; j++) {
1356       if ( (col = garray[B->j[j]]) > cstart) break;
1357       column_indices[cnt++] = col;
1358     }
1359     for (k=A->i[i]; k<A->i[i+1]; k++) {
1360       column_indices[cnt++] = A->j[k] + cstart;
1361     }
1362     for (; j<B->i[i+1]; j++) {
1363       column_indices[cnt++] = garray[B->j[j]];
1364     }
1365   }
1366   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1367 
1368   /* store the column indices to the file */
1369    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1370   if (!rank) {
1371     MPI_Status status;
1372     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1373     for (i=1; i<size; i++) {
1374       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1375       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1376       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1377       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1378       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1379     }
1380      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1381   } else {
1382     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1383     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1384     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1386   }
1387   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1388 
1389   /* load up the local column values */
1390   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1391   cnt  = 0;
1392   for (i=0; i<mat->rmap->n; i++) {
1393     for (j=B->i[i]; j<B->i[i+1]; j++) {
1394       if ( garray[B->j[j]] > cstart) break;
1395       column_values[cnt++] = B->a[j];
1396     }
1397     for (k=A->i[i]; k<A->i[i+1]; k++) {
1398       column_values[cnt++] = A->a[k];
1399     }
1400     for (; j<B->i[i+1]; j++) {
1401       column_values[cnt++] = B->a[j];
1402     }
1403   }
1404   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1405 
1406   /* store the column values to the file */
1407    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1408   if (!rank) {
1409     MPI_Status status;
1410     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1411     for (i=1; i<size; i++) {
1412        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1413       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1414       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1415       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1416       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1417     }
1418     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1419   } else {
1420     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1421     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1422     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1424   }
1425   ierr = PetscFree(column_values);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1432 {
1433   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1434   PetscErrorCode    ierr;
1435   PetscMPIInt       rank = aij->rank,size = aij->size;
1436   PetscBool         isdraw,iascii,isbinary;
1437   PetscViewer       sviewer;
1438   PetscViewerFormat format;
1439 
1440   PetscFunctionBegin;
1441   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1442   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1443   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1444   if (iascii) {
1445     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1446     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1447       MatInfo    info;
1448       PetscBool  inodes;
1449 
1450       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1451       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1452       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1454       if (!inodes) {
1455         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1456 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1457       } else {
1458         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1459 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1460       }
1461       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1463       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1465       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1468       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1469       PetscFunctionReturn(0);
1470     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1471       PetscInt   inodecount,inodelimit,*inodes;
1472       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1473       if (inodes) {
1474         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1475       } else {
1476         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1477       }
1478       PetscFunctionReturn(0);
1479     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1480       PetscFunctionReturn(0);
1481     }
1482   } else if (isbinary) {
1483     if (size == 1) {
1484       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1485       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1486     } else {
1487       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1488     }
1489     PetscFunctionReturn(0);
1490   } else if (isdraw) {
1491     PetscDraw  draw;
1492     PetscBool  isnull;
1493     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1494     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1495   }
1496 
1497   if (size == 1) {
1498     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1499     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1500   } else {
1501     /* assemble the entire matrix onto first processor. */
1502     Mat         A;
1503     Mat_SeqAIJ  *Aloc;
1504     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1505     MatScalar   *a;
1506 
1507     if (mat->rmap->N > 1024) {
1508       PetscBool  flg = PETSC_FALSE;
1509 
1510       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1511       if (!flg) {
1512         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1513       }
1514     }
1515 
1516     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1517     if (!rank) {
1518       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1519     } else {
1520       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1521     }
1522     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1523     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1524     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1525     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1526     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1527 
1528     /* copy over the A part */
1529     Aloc = (Mat_SeqAIJ*)aij->A->data;
1530     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1531     row = mat->rmap->rstart;
1532     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1533     for (i=0; i<m; i++) {
1534       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1535       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1536     }
1537     aj = Aloc->j;
1538     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1539 
1540     /* copy over the B part */
1541     Aloc = (Mat_SeqAIJ*)aij->B->data;
1542     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543     row  = mat->rmap->rstart;
1544     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1545     ct   = cols;
1546     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1547     for (i=0; i<m; i++) {
1548       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1549       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1550     }
1551     ierr = PetscFree(ct);CHKERRQ(ierr);
1552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554     /*
1555        Everyone has to call to draw the matrix since the graphics waits are
1556        synchronized across all processors that share the PetscDraw object
1557     */
1558     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1559     if (!rank) {
1560       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1561       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1562       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1563       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1564     }
1565     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     ierr = MatDestroy(&A);CHKERRQ(ierr);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatView_MPIAIJ"
1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1574 {
1575   PetscErrorCode ierr;
1576   PetscBool      iascii,isdraw,issocket,isbinary;
1577 
1578   PetscFunctionBegin;
1579   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1580   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1581   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1582   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1583   if (iascii || isdraw || isbinary || issocket) {
1584     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1585   } else {
1586     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSOR_MPIAIJ"
1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1594 {
1595   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1596   PetscErrorCode ierr;
1597   Vec            bb1 = 0;
1598   PetscBool      hasop;
1599 
1600   PetscFunctionBegin;
1601   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1602     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1603   }
1604 
1605   if (flag == SOR_APPLY_UPPER) {
1606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607     PetscFunctionReturn(0);
1608   }
1609 
1610   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1611     if (flag & SOR_ZERO_INITIAL_GUESS) {
1612       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1613       its--;
1614     }
1615 
1616     while (its--) {
1617       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1618       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1619 
1620       /* update rhs: bb1 = bb - B*x */
1621       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1622       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1623 
1624       /* local sweep */
1625       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1626     }
1627   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1628     if (flag & SOR_ZERO_INITIAL_GUESS) {
1629       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1630       its--;
1631     }
1632     while (its--) {
1633       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1634       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1635 
1636       /* update rhs: bb1 = bb - B*x */
1637       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1638       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1639 
1640       /* local sweep */
1641       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1642     }
1643   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1644     if (flag & SOR_ZERO_INITIAL_GUESS) {
1645       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1646       its--;
1647     }
1648     while (its--) {
1649       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1650       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1651 
1652       /* update rhs: bb1 = bb - B*x */
1653       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1654       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1655 
1656       /* local sweep */
1657       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1658     }
1659   }  else if (flag & SOR_EISENSTAT) {
1660     Vec         xx1;
1661 
1662     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1663     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1664 
1665     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667     if (!mat->diag) {
1668       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1669       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1670     }
1671     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1672     if (hasop) {
1673       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1674     } else {
1675       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1676     }
1677     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1678 
1679     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1680 
1681     /* local sweep */
1682     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1683     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1684     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1685   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1686 
1687   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatPermute_MPIAIJ"
1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1694 {
1695   MPI_Comm       comm;
1696   PetscInt       first,local_rowsize,local_colsize;
1697   const PetscInt *rows;
1698   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1703   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1704   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1705   /* collect the global row permutation and invert it */
1706   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1707   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1708   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1709   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1710   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1711   /* get the local target indices */
1712   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1713   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1714   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1716   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1717   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1718   /* the column permutation is so much easier;
1719      make a local version of 'colp' and invert it */
1720   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1721   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1722   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1723   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1724   /* now we just get the submatrix */
1725   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1726   /* clean up */
1727   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1728   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1735 {
1736   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1737   Mat            A = mat->A,B = mat->B;
1738   PetscErrorCode ierr;
1739   PetscReal      isend[5],irecv[5];
1740 
1741   PetscFunctionBegin;
1742   info->block_size     = 1.0;
1743   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1744   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1745   isend[3] = info->memory;  isend[4] = info->mallocs;
1746   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1747   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1748   isend[3] += info->memory;  isend[4] += info->mallocs;
1749   if (flag == MAT_LOCAL) {
1750     info->nz_used      = isend[0];
1751     info->nz_allocated = isend[1];
1752     info->nz_unneeded  = isend[2];
1753     info->memory       = isend[3];
1754     info->mallocs      = isend[4];
1755   } else if (flag == MAT_GLOBAL_MAX) {
1756     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1757     info->nz_used      = irecv[0];
1758     info->nz_allocated = irecv[1];
1759     info->nz_unneeded  = irecv[2];
1760     info->memory       = irecv[3];
1761     info->mallocs      = irecv[4];
1762   } else if (flag == MAT_GLOBAL_SUM) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   }
1770   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1771   info->fill_ratio_needed = 0;
1772   info->factor_mallocs    = 0;
1773 
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatSetOption_MPIAIJ"
1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1780 {
1781   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1782   PetscErrorCode ierr;
1783 
1784   PetscFunctionBegin;
1785   switch (op) {
1786   case MAT_NEW_NONZERO_LOCATIONS:
1787   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1788   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1789   case MAT_KEEP_NONZERO_PATTERN:
1790   case MAT_NEW_NONZERO_LOCATION_ERR:
1791   case MAT_USE_INODES:
1792   case MAT_IGNORE_ZERO_ENTRIES:
1793     MatCheckPreallocated(A,1);
1794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1795     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1796     break;
1797   case MAT_ROW_ORIENTED:
1798     a->roworiented = flg;
1799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1800     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1801     break;
1802   case MAT_NEW_DIAGONALS:
1803     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1804     break;
1805   case MAT_IGNORE_OFF_PROC_ENTRIES:
1806     a->donotstash = flg;
1807     break;
1808   case MAT_SPD:
1809     A->spd_set                         = PETSC_TRUE;
1810     A->spd                             = flg;
1811     if (flg) {
1812       A->symmetric                     = PETSC_TRUE;
1813       A->structurally_symmetric        = PETSC_TRUE;
1814       A->symmetric_set                 = PETSC_TRUE;
1815       A->structurally_symmetric_set    = PETSC_TRUE;
1816     }
1817     break;
1818   case MAT_SYMMETRIC:
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_STRUCTURALLY_SYMMETRIC:
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     break;
1824   case MAT_HERMITIAN:
1825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826     break;
1827   case MAT_SYMMETRY_ETERNAL:
1828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1829     break;
1830   default:
1831     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1832   }
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatGetRow_MPIAIJ"
1838 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1839 {
1840   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1841   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1842   PetscErrorCode ierr;
1843   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1844   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1845   PetscInt       *cmap,*idx_p;
1846 
1847   PetscFunctionBegin;
1848   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1849   mat->getrowactive = PETSC_TRUE;
1850 
1851   if (!mat->rowvalues && (idx || v)) {
1852     /*
1853         allocate enough space to hold information from the longest row.
1854     */
1855     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1856     PetscInt   max = 1,tmp;
1857     for (i=0; i<matin->rmap->n; i++) {
1858       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1859       if (max < tmp) { max = tmp; }
1860     }
1861     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1862   }
1863 
1864   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1865   lrow = row - rstart;
1866 
1867   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1868   if (!v)   {pvA = 0; pvB = 0;}
1869   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1870   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1871   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1872   nztot = nzA + nzB;
1873 
1874   cmap  = mat->garray;
1875   if (v  || idx) {
1876     if (nztot) {
1877       /* Sort by increasing column numbers, assuming A and B already sorted */
1878       PetscInt imark = -1;
1879       if (v) {
1880         *v = v_p = mat->rowvalues;
1881         for (i=0; i<nzB; i++) {
1882           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1883           else break;
1884         }
1885         imark = i;
1886         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1887         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1888       }
1889       if (idx) {
1890         *idx = idx_p = mat->rowindices;
1891         if (imark > -1) {
1892           for (i=0; i<imark; i++) {
1893             idx_p[i] = cmap[cworkB[i]];
1894           }
1895         } else {
1896           for (i=0; i<nzB; i++) {
1897             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1898             else break;
1899           }
1900           imark = i;
1901         }
1902         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1903         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1904       }
1905     } else {
1906       if (idx) *idx = 0;
1907       if (v)   *v   = 0;
1908     }
1909   }
1910   *nz = nztot;
1911   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1912   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1918 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1919 {
1920   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1921 
1922   PetscFunctionBegin;
1923   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1924   aij->getrowactive = PETSC_FALSE;
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatNorm_MPIAIJ"
1930 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1931 {
1932   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1933   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1934   PetscErrorCode ierr;
1935   PetscInt       i,j,cstart = mat->cmap->rstart;
1936   PetscReal      sum = 0.0;
1937   MatScalar      *v;
1938 
1939   PetscFunctionBegin;
1940   if (aij->size == 1) {
1941     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1942   } else {
1943     if (type == NORM_FROBENIUS) {
1944       v = amat->a;
1945       for (i=0; i<amat->nz; i++) {
1946 #if defined(PETSC_USE_COMPLEX)
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948 #else
1949         sum += (*v)*(*v); v++;
1950 #endif
1951       }
1952       v = bmat->a;
1953       for (i=0; i<bmat->nz; i++) {
1954 #if defined(PETSC_USE_COMPLEX)
1955         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1956 #else
1957         sum += (*v)*(*v); v++;
1958 #endif
1959       }
1960       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1961       *norm = PetscSqrtReal(*norm);
1962     } else if (type == NORM_1) { /* max column norm */
1963       PetscReal *tmp,*tmp2;
1964       PetscInt  *jj,*garray = aij->garray;
1965       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1966       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1967       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1968       *norm = 0.0;
1969       v = amat->a; jj = amat->j;
1970       for (j=0; j<amat->nz; j++) {
1971         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1972       }
1973       v = bmat->a; jj = bmat->j;
1974       for (j=0; j<bmat->nz; j++) {
1975         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1976       }
1977       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1978       for (j=0; j<mat->cmap->N; j++) {
1979         if (tmp2[j] > *norm) *norm = tmp2[j];
1980       }
1981       ierr = PetscFree(tmp);CHKERRQ(ierr);
1982       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1983     } else if (type == NORM_INFINITY) { /* max row norm */
1984       PetscReal ntemp = 0.0;
1985       for (j=0; j<aij->A->rmap->n; j++) {
1986         v = amat->a + amat->i[j];
1987         sum = 0.0;
1988         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1989           sum += PetscAbsScalar(*v); v++;
1990         }
1991         v = bmat->a + bmat->i[j];
1992         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1993           sum += PetscAbsScalar(*v); v++;
1994         }
1995         if (sum > ntemp) ntemp = sum;
1996       }
1997       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1998     } else {
1999       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2000     }
2001   }
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "MatTranspose_MPIAIJ"
2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2008 {
2009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2010   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2011   PetscErrorCode ierr;
2012   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2013   PetscInt       cstart=A->cmap->rstart,ncol;
2014   Mat            B;
2015   MatScalar      *array;
2016 
2017   PetscFunctionBegin;
2018   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2019 
2020   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2021   ai = Aloc->i; aj = Aloc->j;
2022   bi = Bloc->i; bj = Bloc->j;
2023   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2024     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2025     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2026     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2027     for (i=0; i<ai[ma]; i++){
2028       d_nnz[aj[i]] ++;
2029       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2030     }
2031 
2032     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2033     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2034     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2035     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2036     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2037     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2038     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2039   } else {
2040     B = *matout;
2041   }
2042 
2043   /* copy over the A part */
2044   array = Aloc->a;
2045   row = A->rmap->rstart;
2046   for (i=0; i<ma; i++) {
2047     ncol = ai[i+1]-ai[i];
2048     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2049     row++; array += ncol; aj += ncol;
2050   }
2051   aj = Aloc->j;
2052   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2053 
2054   /* copy over the B part */
2055   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2056   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2057   array = Bloc->a;
2058   row = A->rmap->rstart;
2059   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2060   cols_tmp = cols;
2061   for (i=0; i<mb; i++) {
2062     ncol = bi[i+1]-bi[i];
2063     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2064     row++; array += ncol; cols_tmp += ncol;
2065   }
2066   ierr = PetscFree(cols);CHKERRQ(ierr);
2067 
2068   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2071     *matout = B;
2072   } else {
2073     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2080 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2081 {
2082   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2083   Mat            a = aij->A,b = aij->B;
2084   PetscErrorCode ierr;
2085   PetscInt       s1,s2,s3;
2086 
2087   PetscFunctionBegin;
2088   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2089   if (rr) {
2090     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2091     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2092     /* Overlap communication with computation. */
2093     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2094   }
2095   if (ll) {
2096     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2097     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2098     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2099   }
2100   /* scale  the diagonal block */
2101   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2102 
2103   if (rr) {
2104     /* Do a scatter end and then right scale the off-diagonal block */
2105     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2106     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2107   }
2108 
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 #undef __FUNCT__
2113 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2114 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2115 {
2116   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2117   PetscErrorCode ierr;
2118 
2119   PetscFunctionBegin;
2120   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 #undef __FUNCT__
2125 #define __FUNCT__ "MatEqual_MPIAIJ"
2126 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2127 {
2128   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2129   Mat            a,b,c,d;
2130   PetscBool      flg;
2131   PetscErrorCode ierr;
2132 
2133   PetscFunctionBegin;
2134   a = matA->A; b = matA->B;
2135   c = matB->A; d = matB->B;
2136 
2137   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2138   if (flg) {
2139     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2140   }
2141   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatCopy_MPIAIJ"
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatSetUp_MPIAIJ"
2171 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2172 {
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2182 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2184 {
2185   PetscInt          i,m=Y->rmap->N;
2186   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2187   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2188   const PetscInt    *xi = x->i,*yi = y->i;
2189 
2190   PetscFunctionBegin;
2191   /* Set the number of nonzeros in the new matrix */
2192   for(i=0; i<m; i++) {
2193     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2194     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2195     nnz[i] = 0;
2196     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2197       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2198       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2199       nnz[i]++;
2200     }
2201     for (; k<nzy; k++) nnz[i]++;
2202   }
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatAXPY_MPIAIJ"
2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2209 {
2210   PetscErrorCode ierr;
2211   PetscInt       i;
2212   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2213   PetscBLASInt   bnz,one=1;
2214   Mat_SeqAIJ     *x,*y;
2215 
2216   PetscFunctionBegin;
2217   if (str == SAME_NONZERO_PATTERN) {
2218     PetscScalar alpha = a;
2219     x = (Mat_SeqAIJ *)xx->A->data;
2220     y = (Mat_SeqAIJ *)yy->A->data;
2221     bnz = PetscBLASIntCast(x->nz);
2222     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2223     x = (Mat_SeqAIJ *)xx->B->data;
2224     y = (Mat_SeqAIJ *)yy->B->data;
2225     bnz = PetscBLASIntCast(x->nz);
2226     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2227   } else if (str == SUBSET_NONZERO_PATTERN) {
2228     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2229 
2230     x = (Mat_SeqAIJ *)xx->B->data;
2231     y = (Mat_SeqAIJ *)yy->B->data;
2232     if (y->xtoy && y->XtoY != xx->B) {
2233       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2234       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2235     }
2236     if (!y->xtoy) { /* get xtoy */
2237       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2238       y->XtoY = xx->B;
2239       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2240     }
2241     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2242   } else {
2243     Mat B;
2244     PetscInt *nnz_d,*nnz_o;
2245     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2246     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2247     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2248     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2249     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2250     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2251     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,B);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatConjugate_MPIAIJ"
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatRealPart_MPIAIJ"
2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2285 {
2286   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2291   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2298 {
2299   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2304   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #ifdef PETSC_HAVE_PBGL
2309 
2310 #include <boost/parallel/mpi/bsp_process_group.hpp>
2311 #include <boost/graph/distributed/ilu_default_graph.hpp>
2312 #include <boost/graph/distributed/ilu_0_block.hpp>
2313 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2314 #include <boost/graph/distributed/petsc/interface.hpp>
2315 #include <boost/multi_array.hpp>
2316 #include <boost/parallel/distributed_property_map->hpp>
2317 
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2320 /*
2321   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2322 */
2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2324 {
2325   namespace petsc = boost::distributed::petsc;
2326 
2327   namespace graph_dist = boost::graph::distributed;
2328   using boost::graph::distributed::ilu_default::process_group_type;
2329   using boost::graph::ilu_permuted;
2330 
2331   PetscBool       row_identity, col_identity;
2332   PetscContainer  c;
2333   PetscInt        m, n, M, N;
2334   PetscErrorCode  ierr;
2335 
2336   PetscFunctionBegin;
2337   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2338   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2339   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2340   if (!row_identity || !col_identity) {
2341     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2342   }
2343 
2344   process_group_type pg;
2345   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2346   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2347   lgraph_type&   level_graph = *lgraph_p;
2348   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2349 
2350   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2351   ilu_permuted(level_graph);
2352 
2353   /* put together the new matrix */
2354   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2355   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2356   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2357   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2358   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2359   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2360   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2362 
2363   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2364   ierr = PetscContainerSetPointer(c, lgraph_p);
2365   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2366   ierr = PetscContainerDestroy(&c);
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2372 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2373 {
2374   PetscFunctionBegin;
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 #undef __FUNCT__
2379 #define __FUNCT__ "MatSolve_MPIAIJ"
2380 /*
2381   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2382 */
2383 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2384 {
2385   namespace graph_dist = boost::graph::distributed;
2386 
2387   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2388   lgraph_type*   lgraph_p;
2389   PetscContainer c;
2390   PetscErrorCode ierr;
2391 
2392   PetscFunctionBegin;
2393   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2394   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2395   ierr = VecCopy(b, x);CHKERRQ(ierr);
2396 
2397   PetscScalar* array_x;
2398   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2399   PetscInt sx;
2400   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2401 
2402   PetscScalar* array_b;
2403   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2404   PetscInt sb;
2405   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2406 
2407   lgraph_type&   level_graph = *lgraph_p;
2408   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2409 
2410   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2411   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2412                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2413 
2414   typedef boost::iterator_property_map<array_ref_type::iterator,
2415                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2416   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2417                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2418 
2419   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2420 
2421   PetscFunctionReturn(0);
2422 }
2423 #endif
2424 
2425 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2426   PetscInt       nzlocal,nsends,nrecvs;
2427   PetscMPIInt    *send_rank,*recv_rank;
2428   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2429   PetscScalar    *sbuf_a,**rbuf_a;
2430   PetscErrorCode (*Destroy)(Mat);
2431 } Mat_Redundant;
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2435 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2436 {
2437   PetscErrorCode       ierr;
2438   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2439   PetscInt             i;
2440 
2441   PetscFunctionBegin;
2442   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2443   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2444   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2445   for (i=0; i<redund->nrecvs; i++){
2446     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2447     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2448   }
2449   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2450   ierr = PetscFree(redund);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatDestroy_MatRedundant"
2456 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2457 {
2458   PetscErrorCode  ierr;
2459   PetscContainer  container;
2460   Mat_Redundant   *redund=PETSC_NULL;
2461 
2462   PetscFunctionBegin;
2463   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2464   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2465   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2466   A->ops->destroy = redund->Destroy;
2467   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2468   if (A->ops->destroy) {
2469     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2470   }
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 #undef __FUNCT__
2475 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2476 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2477 {
2478   PetscMPIInt    rank,size;
2479   MPI_Comm       comm=((PetscObject)mat)->comm;
2480   PetscErrorCode ierr;
2481   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2482   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2483   PetscInt       *rowrange=mat->rmap->range;
2484   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2485   Mat            A=aij->A,B=aij->B,C=*matredundant;
2486   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2487   PetscScalar    *sbuf_a;
2488   PetscInt       nzlocal=a->nz+b->nz;
2489   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2490   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2491   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2492   MatScalar      *aworkA,*aworkB;
2493   PetscScalar    *vals;
2494   PetscMPIInt    tag1,tag2,tag3,imdex;
2495   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2496                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2497   MPI_Status     recv_status,*send_status;
2498   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2499   PetscInt       **rbuf_j=PETSC_NULL;
2500   PetscScalar    **rbuf_a=PETSC_NULL;
2501   Mat_Redundant  *redund=PETSC_NULL;
2502   PetscContainer container;
2503 
2504   PetscFunctionBegin;
2505   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2506   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2507 
2508   if (reuse == MAT_REUSE_MATRIX) {
2509     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2510     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2511     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2512     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2513     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2514     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2515     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2516     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2517 
2518     nsends    = redund->nsends;
2519     nrecvs    = redund->nrecvs;
2520     send_rank = redund->send_rank;
2521     recv_rank = redund->recv_rank;
2522     sbuf_nz   = redund->sbuf_nz;
2523     rbuf_nz   = redund->rbuf_nz;
2524     sbuf_j    = redund->sbuf_j;
2525     sbuf_a    = redund->sbuf_a;
2526     rbuf_j    = redund->rbuf_j;
2527     rbuf_a    = redund->rbuf_a;
2528   }
2529 
2530   if (reuse == MAT_INITIAL_MATRIX){
2531     PetscMPIInt  subrank,subsize;
2532     PetscInt     nleftover,np_subcomm;
2533     /* get the destination processors' id send_rank, nsends and nrecvs */
2534     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2535     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2536     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2537     np_subcomm = size/nsubcomm;
2538     nleftover  = size - nsubcomm*np_subcomm;
2539     nsends = 0; nrecvs = 0;
2540     for (i=0; i<size; i++){ /* i=rank*/
2541       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2542         send_rank[nsends] = i; nsends++;
2543         recv_rank[nrecvs++] = i;
2544       }
2545     }
2546     if (rank >= size - nleftover){/* this proc is a leftover processor */
2547       i = size-nleftover-1;
2548       j = 0;
2549       while (j < nsubcomm - nleftover){
2550         send_rank[nsends++] = i;
2551         i--; j++;
2552       }
2553     }
2554 
2555     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2556       for (i=0; i<nleftover; i++){
2557         recv_rank[nrecvs++] = size-nleftover+i;
2558       }
2559     }
2560 
2561     /* allocate sbuf_j, sbuf_a */
2562     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2563     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2564     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2565   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2566 
2567   /* copy mat's local entries into the buffers */
2568   if (reuse == MAT_INITIAL_MATRIX){
2569     rownz_max = 0;
2570     rptr = sbuf_j;
2571     cols = sbuf_j + rend-rstart + 1;
2572     vals = sbuf_a;
2573     rptr[0] = 0;
2574     for (i=0; i<rend-rstart; i++){
2575       row = i + rstart;
2576       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2577       ncols  = nzA + nzB;
2578       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2579       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2580       /* load the column indices for this row into cols */
2581       lwrite = 0;
2582       for (l=0; l<nzB; l++) {
2583         if ((ctmp = bmap[cworkB[l]]) < cstart){
2584           vals[lwrite]   = aworkB[l];
2585           cols[lwrite++] = ctmp;
2586         }
2587       }
2588       for (l=0; l<nzA; l++){
2589         vals[lwrite]   = aworkA[l];
2590         cols[lwrite++] = cstart + cworkA[l];
2591       }
2592       for (l=0; l<nzB; l++) {
2593         if ((ctmp = bmap[cworkB[l]]) >= cend){
2594           vals[lwrite]   = aworkB[l];
2595           cols[lwrite++] = ctmp;
2596         }
2597       }
2598       vals += ncols;
2599       cols += ncols;
2600       rptr[i+1] = rptr[i] + ncols;
2601       if (rownz_max < ncols) rownz_max = ncols;
2602     }
2603     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2604   } else { /* only copy matrix values into sbuf_a */
2605     rptr = sbuf_j;
2606     vals = sbuf_a;
2607     rptr[0] = 0;
2608     for (i=0; i<rend-rstart; i++){
2609       row = i + rstart;
2610       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2611       ncols  = nzA + nzB;
2612       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2613       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2614       lwrite = 0;
2615       for (l=0; l<nzB; l++) {
2616         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2617       }
2618       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2619       for (l=0; l<nzB; l++) {
2620         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2621       }
2622       vals += ncols;
2623       rptr[i+1] = rptr[i] + ncols;
2624     }
2625   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2626 
2627   /* send nzlocal to others, and recv other's nzlocal */
2628   /*--------------------------------------------------*/
2629   if (reuse == MAT_INITIAL_MATRIX){
2630     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2631     s_waits2 = s_waits3 + nsends;
2632     s_waits1 = s_waits2 + nsends;
2633     r_waits1 = s_waits1 + nsends;
2634     r_waits2 = r_waits1 + nrecvs;
2635     r_waits3 = r_waits2 + nrecvs;
2636   } else {
2637     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2638     r_waits3 = s_waits3 + nsends;
2639   }
2640 
2641   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2642   if (reuse == MAT_INITIAL_MATRIX){
2643     /* get new tags to keep the communication clean */
2644     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2645     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2646     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2647 
2648     /* post receives of other's nzlocal */
2649     for (i=0; i<nrecvs; i++){
2650       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2651     }
2652     /* send nzlocal to others */
2653     for (i=0; i<nsends; i++){
2654       sbuf_nz[i] = nzlocal;
2655       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2656     }
2657     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2658     count = nrecvs;
2659     while (count) {
2660       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2661       recv_rank[imdex] = recv_status.MPI_SOURCE;
2662       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2663       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2664 
2665       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2666       rbuf_nz[imdex] += i + 2;
2667       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2668       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2669       count--;
2670     }
2671     /* wait on sends of nzlocal */
2672     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2673     /* send mat->i,j to others, and recv from other's */
2674     /*------------------------------------------------*/
2675     for (i=0; i<nsends; i++){
2676       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2677       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2678     }
2679     /* wait on receives of mat->i,j */
2680     /*------------------------------*/
2681     count = nrecvs;
2682     while (count) {
2683       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2684       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2685       count--;
2686     }
2687     /* wait on sends of mat->i,j */
2688     /*---------------------------*/
2689     if (nsends) {
2690       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2691     }
2692   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2693 
2694   /* post receives, send and receive mat->a */
2695   /*----------------------------------------*/
2696   for (imdex=0; imdex<nrecvs; imdex++) {
2697     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2698   }
2699   for (i=0; i<nsends; i++){
2700     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2701   }
2702   count = nrecvs;
2703   while (count) {
2704     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2705     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2706     count--;
2707   }
2708   if (nsends) {
2709     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2710   }
2711 
2712   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2713 
2714   /* create redundant matrix */
2715   /*-------------------------*/
2716   if (reuse == MAT_INITIAL_MATRIX){
2717     /* compute rownz_max for preallocation */
2718     for (imdex=0; imdex<nrecvs; imdex++){
2719       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2720       rptr = rbuf_j[imdex];
2721       for (i=0; i<j; i++){
2722         ncols = rptr[i+1] - rptr[i];
2723         if (rownz_max < ncols) rownz_max = ncols;
2724       }
2725     }
2726 
2727     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2728     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2729     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2730     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2731     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2732     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2733   } else {
2734     C = *matredundant;
2735   }
2736 
2737   /* insert local matrix entries */
2738   rptr = sbuf_j;
2739   cols = sbuf_j + rend-rstart + 1;
2740   vals = sbuf_a;
2741   for (i=0; i<rend-rstart; i++){
2742     row   = i + rstart;
2743     ncols = rptr[i+1] - rptr[i];
2744     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2745     vals += ncols;
2746     cols += ncols;
2747   }
2748   /* insert received matrix entries */
2749   for (imdex=0; imdex<nrecvs; imdex++){
2750     rstart = rowrange[recv_rank[imdex]];
2751     rend   = rowrange[recv_rank[imdex]+1];
2752     rptr = rbuf_j[imdex];
2753     cols = rbuf_j[imdex] + rend-rstart + 1;
2754     vals = rbuf_a[imdex];
2755     for (i=0; i<rend-rstart; i++){
2756       row   = i + rstart;
2757       ncols = rptr[i+1] - rptr[i];
2758       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2759       vals += ncols;
2760       cols += ncols;
2761     }
2762   }
2763   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2764   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2765   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2766   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2767   if (reuse == MAT_INITIAL_MATRIX) {
2768     PetscContainer container;
2769     *matredundant = C;
2770     /* create a supporting struct and attach it to C for reuse */
2771     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2772     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2773     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2774     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2775     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2776     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2777 
2778     redund->nzlocal = nzlocal;
2779     redund->nsends  = nsends;
2780     redund->nrecvs  = nrecvs;
2781     redund->send_rank = send_rank;
2782     redund->recv_rank = recv_rank;
2783     redund->sbuf_nz = sbuf_nz;
2784     redund->rbuf_nz = rbuf_nz;
2785     redund->sbuf_j  = sbuf_j;
2786     redund->sbuf_a  = sbuf_a;
2787     redund->rbuf_j  = rbuf_j;
2788     redund->rbuf_a  = rbuf_a;
2789 
2790     redund->Destroy = C->ops->destroy;
2791     C->ops->destroy = MatDestroy_MatRedundant;
2792   }
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2798 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2799 {
2800   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2801   PetscErrorCode ierr;
2802   PetscInt       i,*idxb = 0;
2803   PetscScalar    *va,*vb;
2804   Vec            vtmp;
2805 
2806   PetscFunctionBegin;
2807   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2808   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2809   if (idx) {
2810     for (i=0; i<A->rmap->n; i++) {
2811       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2812     }
2813   }
2814 
2815   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2816   if (idx) {
2817     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2818   }
2819   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2820   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2821 
2822   for (i=0; i<A->rmap->n; i++){
2823     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2824       va[i] = vb[i];
2825       if (idx) idx[i] = a->garray[idxb[i]];
2826     }
2827   }
2828 
2829   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2830   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2831   ierr = PetscFree(idxb);CHKERRQ(ierr);
2832   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 #undef __FUNCT__
2837 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2838 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2839 {
2840   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2841   PetscErrorCode ierr;
2842   PetscInt       i,*idxb = 0;
2843   PetscScalar    *va,*vb;
2844   Vec            vtmp;
2845 
2846   PetscFunctionBegin;
2847   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2848   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2849   if (idx) {
2850     for (i=0; i<A->cmap->n; i++) {
2851       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2852     }
2853   }
2854 
2855   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2856   if (idx) {
2857     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2858   }
2859   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2860   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2861 
2862   for (i=0; i<A->rmap->n; i++){
2863     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2864       va[i] = vb[i];
2865       if (idx) idx[i] = a->garray[idxb[i]];
2866     }
2867   }
2868 
2869   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2870   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2871   ierr = PetscFree(idxb);CHKERRQ(ierr);
2872   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 #undef __FUNCT__
2877 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2878 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2879 {
2880   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2881   PetscInt       n      = A->rmap->n;
2882   PetscInt       cstart = A->cmap->rstart;
2883   PetscInt      *cmap   = mat->garray;
2884   PetscInt      *diagIdx, *offdiagIdx;
2885   Vec            diagV, offdiagV;
2886   PetscScalar   *a, *diagA, *offdiagA;
2887   PetscInt       r;
2888   PetscErrorCode ierr;
2889 
2890   PetscFunctionBegin;
2891   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2892   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2893   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2894   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2895   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2896   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2897   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2898   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2899   for(r = 0; r < n; ++r) {
2900     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2901       a[r]   = diagA[r];
2902       idx[r] = cstart + diagIdx[r];
2903     } else {
2904       a[r]   = offdiagA[r];
2905       idx[r] = cmap[offdiagIdx[r]];
2906     }
2907   }
2908   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2909   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2910   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2911   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2912   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2913   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 #undef __FUNCT__
2918 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2919 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2920 {
2921   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2922   PetscInt       n      = A->rmap->n;
2923   PetscInt       cstart = A->cmap->rstart;
2924   PetscInt      *cmap   = mat->garray;
2925   PetscInt      *diagIdx, *offdiagIdx;
2926   Vec            diagV, offdiagV;
2927   PetscScalar   *a, *diagA, *offdiagA;
2928   PetscInt       r;
2929   PetscErrorCode ierr;
2930 
2931   PetscFunctionBegin;
2932   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2933   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2934   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2935   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2936   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2937   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2938   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2939   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2940   for(r = 0; r < n; ++r) {
2941     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2942       a[r]   = diagA[r];
2943       idx[r] = cstart + diagIdx[r];
2944     } else {
2945       a[r]   = offdiagA[r];
2946       idx[r] = cmap[offdiagIdx[r]];
2947     }
2948   }
2949   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2950   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2951   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2952   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2953   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2954   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2955   PetscFunctionReturn(0);
2956 }
2957 
2958 #undef __FUNCT__
2959 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2960 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2961 {
2962   PetscErrorCode ierr;
2963   Mat            *dummy;
2964 
2965   PetscFunctionBegin;
2966   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2967   *newmat = *dummy;
2968   ierr = PetscFree(dummy);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2973 
2974 #undef __FUNCT__
2975 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2976 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2977 {
2978   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2979   PetscErrorCode ierr;
2980 
2981   PetscFunctionBegin;
2982   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2983   PetscFunctionReturn(0);
2984 }
2985 
2986 
2987 /* -------------------------------------------------------------------*/
2988 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2989        MatGetRow_MPIAIJ,
2990        MatRestoreRow_MPIAIJ,
2991        MatMult_MPIAIJ,
2992 /* 4*/ MatMultAdd_MPIAIJ,
2993        MatMultTranspose_MPIAIJ,
2994        MatMultTransposeAdd_MPIAIJ,
2995 #ifdef PETSC_HAVE_PBGL
2996        MatSolve_MPIAIJ,
2997 #else
2998        0,
2999 #endif
3000        0,
3001        0,
3002 /*10*/ 0,
3003        0,
3004        0,
3005        MatSOR_MPIAIJ,
3006        MatTranspose_MPIAIJ,
3007 /*15*/ MatGetInfo_MPIAIJ,
3008        MatEqual_MPIAIJ,
3009        MatGetDiagonal_MPIAIJ,
3010        MatDiagonalScale_MPIAIJ,
3011        MatNorm_MPIAIJ,
3012 /*20*/ MatAssemblyBegin_MPIAIJ,
3013        MatAssemblyEnd_MPIAIJ,
3014        MatSetOption_MPIAIJ,
3015        MatZeroEntries_MPIAIJ,
3016 /*24*/ MatZeroRows_MPIAIJ,
3017        0,
3018 #ifdef PETSC_HAVE_PBGL
3019        0,
3020 #else
3021        0,
3022 #endif
3023        0,
3024        0,
3025 /*29*/ MatSetUp_MPIAIJ,
3026 #ifdef PETSC_HAVE_PBGL
3027        0,
3028 #else
3029        0,
3030 #endif
3031        0,
3032        0,
3033        0,
3034 /*34*/ MatDuplicate_MPIAIJ,
3035        0,
3036        0,
3037        0,
3038        0,
3039 /*39*/ MatAXPY_MPIAIJ,
3040        MatGetSubMatrices_MPIAIJ,
3041        MatIncreaseOverlap_MPIAIJ,
3042        MatGetValues_MPIAIJ,
3043        MatCopy_MPIAIJ,
3044 /*44*/ MatGetRowMax_MPIAIJ,
3045        MatScale_MPIAIJ,
3046        0,
3047        0,
3048        MatZeroRowsColumns_MPIAIJ,
3049 /*49*/ 0,
3050        0,
3051        0,
3052        0,
3053        0,
3054 /*54*/ MatFDColoringCreate_MPIAIJ,
3055        0,
3056        MatSetUnfactored_MPIAIJ,
3057        0, /* MatPermute_MPIAIJ, impl currently broken */
3058        0,
3059 /*59*/ MatGetSubMatrix_MPIAIJ,
3060        MatDestroy_MPIAIJ,
3061        MatView_MPIAIJ,
3062        0,
3063        0,
3064 /*64*/ 0,
3065        0,
3066        0,
3067        0,
3068        0,
3069 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3070        MatGetRowMinAbs_MPIAIJ,
3071        0,
3072        MatSetColoring_MPIAIJ,
3073 #if defined(PETSC_HAVE_ADIC)
3074        MatSetValuesAdic_MPIAIJ,
3075 #else
3076        0,
3077 #endif
3078        MatSetValuesAdifor_MPIAIJ,
3079 /*75*/ MatFDColoringApply_AIJ,
3080        0,
3081        0,
3082        0,
3083        0,
3084 /*80*/ 0,
3085        0,
3086        0,
3087 /*83*/ MatLoad_MPIAIJ,
3088        0,
3089        0,
3090        0,
3091        0,
3092        0,
3093 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3094        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3095        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3096        MatPtAP_Basic,
3097        MatPtAPSymbolic_MPIAIJ,
3098 /*94*/ MatPtAPNumeric_MPIAIJ,
3099        0,
3100        0,
3101        0,
3102        0,
3103 /*99*/ 0,
3104        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3105        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3106        MatConjugate_MPIAIJ,
3107        0,
3108 /*104*/MatSetValuesRow_MPIAIJ,
3109        MatRealPart_MPIAIJ,
3110        MatImaginaryPart_MPIAIJ,
3111        0,
3112        0,
3113 /*109*/0,
3114        MatGetRedundantMatrix_MPIAIJ,
3115        MatGetRowMin_MPIAIJ,
3116        0,
3117        0,
3118 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3119        0,
3120        0,
3121        0,
3122        0,
3123 /*119*/0,
3124        0,
3125        0,
3126        0,
3127        MatGetMultiProcBlock_MPIAIJ,
3128 /*124*/MatFindNonZeroRows_MPIAIJ,
3129        MatGetColumnNorms_MPIAIJ,
3130        MatInvertBlockDiagonal_MPIAIJ,
3131        0,
3132        MatGetSubMatricesParallel_MPIAIJ,
3133 /*129*/0,
3134        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3135        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3136        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3137        0,
3138 /*134*/0,
3139        0,
3140        0,
3141        0,
3142        0
3143 };
3144 
3145 /* ----------------------------------------------------------------------------------------*/
3146 
3147 EXTERN_C_BEGIN
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3150 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3151 {
3152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3153   PetscErrorCode ierr;
3154 
3155   PetscFunctionBegin;
3156   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3157   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3158   PetscFunctionReturn(0);
3159 }
3160 EXTERN_C_END
3161 
3162 EXTERN_C_BEGIN
3163 #undef __FUNCT__
3164 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3165 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3166 {
3167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3168   PetscErrorCode ierr;
3169 
3170   PetscFunctionBegin;
3171   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3172   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 EXTERN_C_END
3176 
3177 EXTERN_C_BEGIN
3178 #undef __FUNCT__
3179 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3180 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3181 {
3182   Mat_MPIAIJ     *b;
3183   PetscErrorCode ierr;
3184   PetscInt       i;
3185   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3186 
3187   PetscFunctionBegin;
3188   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3189   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3190   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3191   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3192   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3193   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3194 
3195   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3196   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3197   if (d_nnz) {
3198     for (i=0; i<B->rmap->n; i++) {
3199       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3200     }
3201   }
3202   if (o_nnz) {
3203     for (i=0; i<B->rmap->n; i++) {
3204       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3205     }
3206   }
3207   b = (Mat_MPIAIJ*)B->data;
3208 
3209   if (!B->preallocated) {
3210     /* Explicitly create 2 MATSEQAIJ matrices. */
3211     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3212     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3213     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3214     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3215     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3216     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3217     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3218     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3219     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3220     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3221   }
3222 
3223   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3224   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3225   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3226   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3227   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3228   B->preallocated = PETSC_TRUE;
3229   PetscFunctionReturn(0);
3230 }
3231 EXTERN_C_END
3232 
3233 #undef __FUNCT__
3234 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3235 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3236 {
3237   Mat            mat;
3238   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3239   PetscErrorCode ierr;
3240 
3241   PetscFunctionBegin;
3242   *newmat       = 0;
3243   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3244   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3245   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3246   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3247   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3248   a    = (Mat_MPIAIJ*)mat->data;
3249 
3250   mat->factortype    = matin->factortype;
3251   mat->rmap->bs      = matin->rmap->bs;
3252   mat->cmap->bs      = matin->cmap->bs;
3253   mat->assembled    = PETSC_TRUE;
3254   mat->insertmode   = NOT_SET_VALUES;
3255   mat->preallocated = PETSC_TRUE;
3256 
3257   a->size           = oldmat->size;
3258   a->rank           = oldmat->rank;
3259   a->donotstash     = oldmat->donotstash;
3260   a->roworiented    = oldmat->roworiented;
3261   a->rowindices     = 0;
3262   a->rowvalues      = 0;
3263   a->getrowactive   = PETSC_FALSE;
3264 
3265   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3266   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3267 
3268   if (oldmat->colmap) {
3269 #if defined (PETSC_USE_CTABLE)
3270     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3271 #else
3272     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3273     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3274     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3275 #endif
3276   } else a->colmap = 0;
3277   if (oldmat->garray) {
3278     PetscInt len;
3279     len  = oldmat->B->cmap->n;
3280     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3281     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3282     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3283   } else a->garray = 0;
3284 
3285   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3286   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3287   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3288   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3289   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3290   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3291   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3292   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3293   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3294   *newmat = mat;
3295   PetscFunctionReturn(0);
3296 }
3297 
3298 
3299 
3300 #undef __FUNCT__
3301 #define __FUNCT__ "MatLoad_MPIAIJ"
3302 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3303 {
3304   PetscScalar    *vals,*svals;
3305   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3306   PetscErrorCode ierr;
3307   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3308   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3309   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3310   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3311   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3312   int            fd;
3313 
3314   PetscFunctionBegin;
3315   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3316   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3317   if (!rank) {
3318     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3319     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3320     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3321   }
3322 
3323   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3324 
3325   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3326   M = header[1]; N = header[2];
3327   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3328   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3329   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3330 
3331   /* If global sizes are set, check if they are consistent with that given in the file */
3332   if (sizesset) {
3333     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3334   }
3335   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3336   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3337 
3338   /* determine ownership of all rows */
3339   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3340   else m = newMat->rmap->n; /* Set by user */
3341 
3342   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3343   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3344 
3345   /* First process needs enough room for process with most rows */
3346   if (!rank) {
3347     mmax       = rowners[1];
3348     for (i=2; i<size; i++) {
3349       mmax = PetscMax(mmax,rowners[i]);
3350     }
3351   } else mmax = m;
3352 
3353   rowners[0] = 0;
3354   for (i=2; i<=size; i++) {
3355     rowners[i] += rowners[i-1];
3356   }
3357   rstart = rowners[rank];
3358   rend   = rowners[rank+1];
3359 
3360   /* distribute row lengths to all processors */
3361   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3362   if (!rank) {
3363     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3364     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3365     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3366     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3367     for (j=0; j<m; j++) {
3368       procsnz[0] += ourlens[j];
3369     }
3370     for (i=1; i<size; i++) {
3371       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3372       /* calculate the number of nonzeros on each processor */
3373       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3374         procsnz[i] += rowlengths[j];
3375       }
3376       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3377     }
3378     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3379   } else {
3380     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3381   }
3382 
3383   if (!rank) {
3384     /* determine max buffer needed and allocate it */
3385     maxnz = 0;
3386     for (i=0; i<size; i++) {
3387       maxnz = PetscMax(maxnz,procsnz[i]);
3388     }
3389     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3390 
3391     /* read in my part of the matrix column indices  */
3392     nz   = procsnz[0];
3393     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3394     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3395 
3396     /* read in every one elses and ship off */
3397     for (i=1; i<size; i++) {
3398       nz     = procsnz[i];
3399       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3400       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3401     }
3402     ierr = PetscFree(cols);CHKERRQ(ierr);
3403   } else {
3404     /* determine buffer space needed for message */
3405     nz = 0;
3406     for (i=0; i<m; i++) {
3407       nz += ourlens[i];
3408     }
3409     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3410 
3411     /* receive message of column indices*/
3412     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3413   }
3414 
3415   /* determine column ownership if matrix is not square */
3416   if (N != M) {
3417     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3418     else n = newMat->cmap->n;
3419     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3420     cstart = cend - n;
3421   } else {
3422     cstart = rstart;
3423     cend   = rend;
3424     n      = cend - cstart;
3425   }
3426 
3427   /* loop over local rows, determining number of off diagonal entries */
3428   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3429   jj = 0;
3430   for (i=0; i<m; i++) {
3431     for (j=0; j<ourlens[i]; j++) {
3432       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3433       jj++;
3434     }
3435   }
3436 
3437   for (i=0; i<m; i++) {
3438     ourlens[i] -= offlens[i];
3439   }
3440   if (!sizesset) {
3441     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3442   }
3443   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3444 
3445   for (i=0; i<m; i++) {
3446     ourlens[i] += offlens[i];
3447   }
3448 
3449   if (!rank) {
3450     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3451 
3452     /* read in my part of the matrix numerical values  */
3453     nz   = procsnz[0];
3454     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3455 
3456     /* insert into matrix */
3457     jj      = rstart;
3458     smycols = mycols;
3459     svals   = vals;
3460     for (i=0; i<m; i++) {
3461       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3462       smycols += ourlens[i];
3463       svals   += ourlens[i];
3464       jj++;
3465     }
3466 
3467     /* read in other processors and ship out */
3468     for (i=1; i<size; i++) {
3469       nz     = procsnz[i];
3470       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3471       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3472     }
3473     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3474   } else {
3475     /* receive numeric values */
3476     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3477 
3478     /* receive message of values*/
3479     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3480 
3481     /* insert into matrix */
3482     jj      = rstart;
3483     smycols = mycols;
3484     svals   = vals;
3485     for (i=0; i<m; i++) {
3486       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3487       smycols += ourlens[i];
3488       svals   += ourlens[i];
3489       jj++;
3490     }
3491   }
3492   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3493   ierr = PetscFree(vals);CHKERRQ(ierr);
3494   ierr = PetscFree(mycols);CHKERRQ(ierr);
3495   ierr = PetscFree(rowners);CHKERRQ(ierr);
3496 
3497   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3498   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3499   PetscFunctionReturn(0);
3500 }
3501 
3502 #undef __FUNCT__
3503 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3504 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3505 {
3506   PetscErrorCode ierr;
3507   IS             iscol_local;
3508   PetscInt       csize;
3509 
3510   PetscFunctionBegin;
3511   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3512   if (call == MAT_REUSE_MATRIX) {
3513     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3514     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3515   } else {
3516     PetscInt cbs;
3517     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3518     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3519     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3520   }
3521   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3522   if (call == MAT_INITIAL_MATRIX) {
3523     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3524     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3525   }
3526   PetscFunctionReturn(0);
3527 }
3528 
3529 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3530 #undef __FUNCT__
3531 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3532 /*
3533     Not great since it makes two copies of the submatrix, first an SeqAIJ
3534   in local and then by concatenating the local matrices the end result.
3535   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3536 
3537   Note: This requires a sequential iscol with all indices.
3538 */
3539 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3540 {
3541   PetscErrorCode ierr;
3542   PetscMPIInt    rank,size;
3543   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3544   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3545   PetscBool      allcolumns, colflag;
3546   Mat            M,Mreuse;
3547   MatScalar      *vwork,*aa;
3548   MPI_Comm       comm = ((PetscObject)mat)->comm;
3549   Mat_SeqAIJ     *aij;
3550 
3551 
3552   PetscFunctionBegin;
3553   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3554   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3555 
3556   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3557   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3558   if (colflag && ncol == mat->cmap->N){
3559     allcolumns = PETSC_TRUE;
3560   } else {
3561     allcolumns = PETSC_FALSE;
3562   }
3563   if (call ==  MAT_REUSE_MATRIX) {
3564     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3565     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3566     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3567   } else {
3568     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3569   }
3570 
3571   /*
3572       m - number of local rows
3573       n - number of columns (same on all processors)
3574       rstart - first row in new global matrix generated
3575   */
3576   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3577   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3578   if (call == MAT_INITIAL_MATRIX) {
3579     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3580     ii  = aij->i;
3581     jj  = aij->j;
3582 
3583     /*
3584         Determine the number of non-zeros in the diagonal and off-diagonal
3585         portions of the matrix in order to do correct preallocation
3586     */
3587 
3588     /* first get start and end of "diagonal" columns */
3589     if (csize == PETSC_DECIDE) {
3590       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3591       if (mglobal == n) { /* square matrix */
3592 	nlocal = m;
3593       } else {
3594         nlocal = n/size + ((n % size) > rank);
3595       }
3596     } else {
3597       nlocal = csize;
3598     }
3599     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3600     rstart = rend - nlocal;
3601     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3602 
3603     /* next, compute all the lengths */
3604     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3605     olens = dlens + m;
3606     for (i=0; i<m; i++) {
3607       jend = ii[i+1] - ii[i];
3608       olen = 0;
3609       dlen = 0;
3610       for (j=0; j<jend; j++) {
3611         if (*jj < rstart || *jj >= rend) olen++;
3612         else dlen++;
3613         jj++;
3614       }
3615       olens[i] = olen;
3616       dlens[i] = dlen;
3617     }
3618     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3619     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3620     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3621     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3622     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3623     ierr = PetscFree(dlens);CHKERRQ(ierr);
3624   } else {
3625     PetscInt ml,nl;
3626 
3627     M = *newmat;
3628     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3629     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3630     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3631     /*
3632          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3633        rather than the slower MatSetValues().
3634     */
3635     M->was_assembled = PETSC_TRUE;
3636     M->assembled     = PETSC_FALSE;
3637   }
3638   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3639   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3640   ii  = aij->i;
3641   jj  = aij->j;
3642   aa  = aij->a;
3643   for (i=0; i<m; i++) {
3644     row   = rstart + i;
3645     nz    = ii[i+1] - ii[i];
3646     cwork = jj;     jj += nz;
3647     vwork = aa;     aa += nz;
3648     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3649   }
3650 
3651   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3652   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3653   *newmat = M;
3654 
3655   /* save submatrix used in processor for next request */
3656   if (call ==  MAT_INITIAL_MATRIX) {
3657     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3658     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3659   }
3660 
3661   PetscFunctionReturn(0);
3662 }
3663 
3664 EXTERN_C_BEGIN
3665 #undef __FUNCT__
3666 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3667 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3668 {
3669   PetscInt       m,cstart, cend,j,nnz,i,d;
3670   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3671   const PetscInt *JJ;
3672   PetscScalar    *values;
3673   PetscErrorCode ierr;
3674 
3675   PetscFunctionBegin;
3676   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3677 
3678   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3679   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3680   m      = B->rmap->n;
3681   cstart = B->cmap->rstart;
3682   cend   = B->cmap->rend;
3683   rstart = B->rmap->rstart;
3684 
3685   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3686 
3687 #if defined(PETSC_USE_DEBUGGING)
3688   for (i=0; i<m; i++) {
3689     nnz     = Ii[i+1]- Ii[i];
3690     JJ      = J + Ii[i];
3691     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3692     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3693     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3694   }
3695 #endif
3696 
3697   for (i=0; i<m; i++) {
3698     nnz     = Ii[i+1]- Ii[i];
3699     JJ      = J + Ii[i];
3700     nnz_max = PetscMax(nnz_max,nnz);
3701     d       = 0;
3702     for (j=0; j<nnz; j++) {
3703       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3704     }
3705     d_nnz[i] = d;
3706     o_nnz[i] = nnz - d;
3707   }
3708   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3709   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3710 
3711   if (v) values = (PetscScalar*)v;
3712   else {
3713     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3714     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3715   }
3716 
3717   for (i=0; i<m; i++) {
3718     ii   = i + rstart;
3719     nnz  = Ii[i+1]- Ii[i];
3720     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3721   }
3722   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3723   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3724 
3725   if (!v) {
3726     ierr = PetscFree(values);CHKERRQ(ierr);
3727   }
3728   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3729   PetscFunctionReturn(0);
3730 }
3731 EXTERN_C_END
3732 
3733 #undef __FUNCT__
3734 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3735 /*@
3736    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3737    (the default parallel PETSc format).
3738 
3739    Collective on MPI_Comm
3740 
3741    Input Parameters:
3742 +  B - the matrix
3743 .  i - the indices into j for the start of each local row (starts with zero)
3744 .  j - the column indices for each local row (starts with zero)
3745 -  v - optional values in the matrix
3746 
3747    Level: developer
3748 
3749    Notes:
3750        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3751      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3752      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3753 
3754        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3755 
3756        The format which is used for the sparse matrix input, is equivalent to a
3757     row-major ordering.. i.e for the following matrix, the input data expected is
3758     as shown:
3759 
3760         1 0 0
3761         2 0 3     P0
3762        -------
3763         4 5 6     P1
3764 
3765      Process0 [P0]: rows_owned=[0,1]
3766         i =  {0,1,3}  [size = nrow+1  = 2+1]
3767         j =  {0,0,2}  [size = nz = 6]
3768         v =  {1,2,3}  [size = nz = 6]
3769 
3770      Process1 [P1]: rows_owned=[2]
3771         i =  {0,3}    [size = nrow+1  = 1+1]
3772         j =  {0,1,2}  [size = nz = 6]
3773         v =  {4,5,6}  [size = nz = 6]
3774 
3775 .keywords: matrix, aij, compressed row, sparse, parallel
3776 
3777 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3778           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3779 @*/
3780 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3781 {
3782   PetscErrorCode ierr;
3783 
3784   PetscFunctionBegin;
3785   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3786   PetscFunctionReturn(0);
3787 }
3788 
3789 #undef __FUNCT__
3790 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3791 /*@C
3792    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3793    (the default parallel PETSc format).  For good matrix assembly performance
3794    the user should preallocate the matrix storage by setting the parameters
3795    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3796    performance can be increased by more than a factor of 50.
3797 
3798    Collective on MPI_Comm
3799 
3800    Input Parameters:
3801 +  A - the matrix
3802 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3803            (same value is used for all local rows)
3804 .  d_nnz - array containing the number of nonzeros in the various rows of the
3805            DIAGONAL portion of the local submatrix (possibly different for each row)
3806            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3807            The size of this array is equal to the number of local rows, i.e 'm'.
3808            For matrices that will be factored, you must leave room for (and set)
3809            the diagonal entry even if it is zero.
3810 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3811            submatrix (same value is used for all local rows).
3812 -  o_nnz - array containing the number of nonzeros in the various rows of the
3813            OFF-DIAGONAL portion of the local submatrix (possibly different for
3814            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3815            structure. The size of this array is equal to the number
3816            of local rows, i.e 'm'.
3817 
3818    If the *_nnz parameter is given then the *_nz parameter is ignored
3819 
3820    The AIJ format (also called the Yale sparse matrix format or
3821    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3822    storage.  The stored row and column indices begin with zero.
3823    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3824 
3825    The parallel matrix is partitioned such that the first m0 rows belong to
3826    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3827    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3828 
3829    The DIAGONAL portion of the local submatrix of a processor can be defined
3830    as the submatrix which is obtained by extraction the part corresponding to
3831    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3832    first row that belongs to the processor, r2 is the last row belonging to
3833    the this processor, and c1-c2 is range of indices of the local part of a
3834    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3835    common case of a square matrix, the row and column ranges are the same and
3836    the DIAGONAL part is also square. The remaining portion of the local
3837    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3838 
3839    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3840 
3841    You can call MatGetInfo() to get information on how effective the preallocation was;
3842    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3843    You can also run with the option -info and look for messages with the string
3844    malloc in them to see if additional memory allocation was needed.
3845 
3846    Example usage:
3847 
3848    Consider the following 8x8 matrix with 34 non-zero values, that is
3849    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3850    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3851    as follows:
3852 
3853 .vb
3854             1  2  0  |  0  3  0  |  0  4
3855     Proc0   0  5  6  |  7  0  0  |  8  0
3856             9  0 10  | 11  0  0  | 12  0
3857     -------------------------------------
3858            13  0 14  | 15 16 17  |  0  0
3859     Proc1   0 18  0  | 19 20 21  |  0  0
3860             0  0  0  | 22 23  0  | 24  0
3861     -------------------------------------
3862     Proc2  25 26 27  |  0  0 28  | 29  0
3863            30  0  0  | 31 32 33  |  0 34
3864 .ve
3865 
3866    This can be represented as a collection of submatrices as:
3867 
3868 .vb
3869       A B C
3870       D E F
3871       G H I
3872 .ve
3873 
3874    Where the submatrices A,B,C are owned by proc0, D,E,F are
3875    owned by proc1, G,H,I are owned by proc2.
3876 
3877    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3878    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3879    The 'M','N' parameters are 8,8, and have the same values on all procs.
3880 
3881    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3882    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3883    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3884    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3885    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3886    matrix, ans [DF] as another SeqAIJ matrix.
3887 
3888    When d_nz, o_nz parameters are specified, d_nz storage elements are
3889    allocated for every row of the local diagonal submatrix, and o_nz
3890    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3891    One way to choose d_nz and o_nz is to use the max nonzerors per local
3892    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3893    In this case, the values of d_nz,o_nz are:
3894 .vb
3895      proc0 : dnz = 2, o_nz = 2
3896      proc1 : dnz = 3, o_nz = 2
3897      proc2 : dnz = 1, o_nz = 4
3898 .ve
3899    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3900    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3901    for proc3. i.e we are using 12+15+10=37 storage locations to store
3902    34 values.
3903 
3904    When d_nnz, o_nnz parameters are specified, the storage is specified
3905    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3906    In the above case the values for d_nnz,o_nnz are:
3907 .vb
3908      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3909      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3910      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3911 .ve
3912    Here the space allocated is sum of all the above values i.e 34, and
3913    hence pre-allocation is perfect.
3914 
3915    Level: intermediate
3916 
3917 .keywords: matrix, aij, compressed row, sparse, parallel
3918 
3919 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3920           MPIAIJ, MatGetInfo()
3921 @*/
3922 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3923 {
3924   PetscErrorCode ierr;
3925 
3926   PetscFunctionBegin;
3927   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3928   PetscValidType(B,1);
3929   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3930   PetscFunctionReturn(0);
3931 }
3932 
3933 #undef __FUNCT__
3934 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3935 /*@
3936      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3937          CSR format the local rows.
3938 
3939    Collective on MPI_Comm
3940 
3941    Input Parameters:
3942 +  comm - MPI communicator
3943 .  m - number of local rows (Cannot be PETSC_DECIDE)
3944 .  n - This value should be the same as the local size used in creating the
3945        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3946        calculated if N is given) For square matrices n is almost always m.
3947 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3948 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3949 .   i - row indices
3950 .   j - column indices
3951 -   a - matrix values
3952 
3953    Output Parameter:
3954 .   mat - the matrix
3955 
3956    Level: intermediate
3957 
3958    Notes:
3959        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3960      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3961      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3962 
3963        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3964 
3965        The format which is used for the sparse matrix input, is equivalent to a
3966     row-major ordering.. i.e for the following matrix, the input data expected is
3967     as shown:
3968 
3969         1 0 0
3970         2 0 3     P0
3971        -------
3972         4 5 6     P1
3973 
3974      Process0 [P0]: rows_owned=[0,1]
3975         i =  {0,1,3}  [size = nrow+1  = 2+1]
3976         j =  {0,0,2}  [size = nz = 6]
3977         v =  {1,2,3}  [size = nz = 6]
3978 
3979      Process1 [P1]: rows_owned=[2]
3980         i =  {0,3}    [size = nrow+1  = 1+1]
3981         j =  {0,1,2}  [size = nz = 6]
3982         v =  {4,5,6}  [size = nz = 6]
3983 
3984 .keywords: matrix, aij, compressed row, sparse, parallel
3985 
3986 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3987           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3988 @*/
3989 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3990 {
3991   PetscErrorCode ierr;
3992 
3993   PetscFunctionBegin;
3994   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3995   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3996   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3997   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3998   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
3999   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4000   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4001   PetscFunctionReturn(0);
4002 }
4003 
4004 #undef __FUNCT__
4005 #define __FUNCT__ "MatCreateAIJ"
4006 /*@C
4007    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4008    (the default parallel PETSc format).  For good matrix assembly performance
4009    the user should preallocate the matrix storage by setting the parameters
4010    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4011    performance can be increased by more than a factor of 50.
4012 
4013    Collective on MPI_Comm
4014 
4015    Input Parameters:
4016 +  comm - MPI communicator
4017 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4018            This value should be the same as the local size used in creating the
4019            y vector for the matrix-vector product y = Ax.
4020 .  n - This value should be the same as the local size used in creating the
4021        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4022        calculated if N is given) For square matrices n is almost always m.
4023 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4024 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4025 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4026            (same value is used for all local rows)
4027 .  d_nnz - array containing the number of nonzeros in the various rows of the
4028            DIAGONAL portion of the local submatrix (possibly different for each row)
4029            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4030            The size of this array is equal to the number of local rows, i.e 'm'.
4031 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4032            submatrix (same value is used for all local rows).
4033 -  o_nnz - array containing the number of nonzeros in the various rows of the
4034            OFF-DIAGONAL portion of the local submatrix (possibly different for
4035            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4036            structure. The size of this array is equal to the number
4037            of local rows, i.e 'm'.
4038 
4039    Output Parameter:
4040 .  A - the matrix
4041 
4042    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4043    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4044    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4045 
4046    Notes:
4047    If the *_nnz parameter is given then the *_nz parameter is ignored
4048 
4049    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4050    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4051    storage requirements for this matrix.
4052 
4053    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4054    processor than it must be used on all processors that share the object for
4055    that argument.
4056 
4057    The user MUST specify either the local or global matrix dimensions
4058    (possibly both).
4059 
4060    The parallel matrix is partitioned across processors such that the
4061    first m0 rows belong to process 0, the next m1 rows belong to
4062    process 1, the next m2 rows belong to process 2 etc.. where
4063    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4064    values corresponding to [m x N] submatrix.
4065 
4066    The columns are logically partitioned with the n0 columns belonging
4067    to 0th partition, the next n1 columns belonging to the next
4068    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4069 
4070    The DIAGONAL portion of the local submatrix on any given processor
4071    is the submatrix corresponding to the rows and columns m,n
4072    corresponding to the given processor. i.e diagonal matrix on
4073    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4074    etc. The remaining portion of the local submatrix [m x (N-n)]
4075    constitute the OFF-DIAGONAL portion. The example below better
4076    illustrates this concept.
4077 
4078    For a square global matrix we define each processor's diagonal portion
4079    to be its local rows and the corresponding columns (a square submatrix);
4080    each processor's off-diagonal portion encompasses the remainder of the
4081    local matrix (a rectangular submatrix).
4082 
4083    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4084 
4085    When calling this routine with a single process communicator, a matrix of
4086    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4087    type of communicator, use the construction mechanism:
4088      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4089 
4090    By default, this format uses inodes (identical nodes) when possible.
4091    We search for consecutive rows with the same nonzero structure, thereby
4092    reusing matrix information to achieve increased efficiency.
4093 
4094    Options Database Keys:
4095 +  -mat_no_inode  - Do not use inodes
4096 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4097 -  -mat_aij_oneindex - Internally use indexing starting at 1
4098         rather than 0.  Note that when calling MatSetValues(),
4099         the user still MUST index entries starting at 0!
4100 
4101 
4102    Example usage:
4103 
4104    Consider the following 8x8 matrix with 34 non-zero values, that is
4105    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4106    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4107    as follows:
4108 
4109 .vb
4110             1  2  0  |  0  3  0  |  0  4
4111     Proc0   0  5  6  |  7  0  0  |  8  0
4112             9  0 10  | 11  0  0  | 12  0
4113     -------------------------------------
4114            13  0 14  | 15 16 17  |  0  0
4115     Proc1   0 18  0  | 19 20 21  |  0  0
4116             0  0  0  | 22 23  0  | 24  0
4117     -------------------------------------
4118     Proc2  25 26 27  |  0  0 28  | 29  0
4119            30  0  0  | 31 32 33  |  0 34
4120 .ve
4121 
4122    This can be represented as a collection of submatrices as:
4123 
4124 .vb
4125       A B C
4126       D E F
4127       G H I
4128 .ve
4129 
4130    Where the submatrices A,B,C are owned by proc0, D,E,F are
4131    owned by proc1, G,H,I are owned by proc2.
4132 
4133    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4134    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4135    The 'M','N' parameters are 8,8, and have the same values on all procs.
4136 
4137    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4138    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4139    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4140    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4141    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4142    matrix, ans [DF] as another SeqAIJ matrix.
4143 
4144    When d_nz, o_nz parameters are specified, d_nz storage elements are
4145    allocated for every row of the local diagonal submatrix, and o_nz
4146    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4147    One way to choose d_nz and o_nz is to use the max nonzerors per local
4148    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4149    In this case, the values of d_nz,o_nz are:
4150 .vb
4151      proc0 : dnz = 2, o_nz = 2
4152      proc1 : dnz = 3, o_nz = 2
4153      proc2 : dnz = 1, o_nz = 4
4154 .ve
4155    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4156    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4157    for proc3. i.e we are using 12+15+10=37 storage locations to store
4158    34 values.
4159 
4160    When d_nnz, o_nnz parameters are specified, the storage is specified
4161    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4162    In the above case the values for d_nnz,o_nnz are:
4163 .vb
4164      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4165      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4166      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4167 .ve
4168    Here the space allocated is sum of all the above values i.e 34, and
4169    hence pre-allocation is perfect.
4170 
4171    Level: intermediate
4172 
4173 .keywords: matrix, aij, compressed row, sparse, parallel
4174 
4175 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4176           MPIAIJ, MatCreateMPIAIJWithArrays()
4177 @*/
4178 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4179 {
4180   PetscErrorCode ierr;
4181   PetscMPIInt    size;
4182 
4183   PetscFunctionBegin;
4184   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4185   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4186   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4187   if (size > 1) {
4188     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4189     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4190   } else {
4191     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4192     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4193   }
4194   PetscFunctionReturn(0);
4195 }
4196 
4197 #undef __FUNCT__
4198 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4199 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4200 {
4201   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4202 
4203   PetscFunctionBegin;
4204   *Ad     = a->A;
4205   *Ao     = a->B;
4206   *colmap = a->garray;
4207   PetscFunctionReturn(0);
4208 }
4209 
4210 #undef __FUNCT__
4211 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4212 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4213 {
4214   PetscErrorCode ierr;
4215   PetscInt       i;
4216   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4217 
4218   PetscFunctionBegin;
4219   if (coloring->ctype == IS_COLORING_GLOBAL) {
4220     ISColoringValue *allcolors,*colors;
4221     ISColoring      ocoloring;
4222 
4223     /* set coloring for diagonal portion */
4224     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4225 
4226     /* set coloring for off-diagonal portion */
4227     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4228     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4229     for (i=0; i<a->B->cmap->n; i++) {
4230       colors[i] = allcolors[a->garray[i]];
4231     }
4232     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4233     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4234     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4235     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4236   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4237     ISColoringValue *colors;
4238     PetscInt        *larray;
4239     ISColoring      ocoloring;
4240 
4241     /* set coloring for diagonal portion */
4242     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4243     for (i=0; i<a->A->cmap->n; i++) {
4244       larray[i] = i + A->cmap->rstart;
4245     }
4246     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4247     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4248     for (i=0; i<a->A->cmap->n; i++) {
4249       colors[i] = coloring->colors[larray[i]];
4250     }
4251     ierr = PetscFree(larray);CHKERRQ(ierr);
4252     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4253     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4254     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4255 
4256     /* set coloring for off-diagonal portion */
4257     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4258     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4259     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4260     for (i=0; i<a->B->cmap->n; i++) {
4261       colors[i] = coloring->colors[larray[i]];
4262     }
4263     ierr = PetscFree(larray);CHKERRQ(ierr);
4264     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4265     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4266     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4267   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4268 
4269   PetscFunctionReturn(0);
4270 }
4271 
4272 #if defined(PETSC_HAVE_ADIC)
4273 #undef __FUNCT__
4274 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4275 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4276 {
4277   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4278   PetscErrorCode ierr;
4279 
4280   PetscFunctionBegin;
4281   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4282   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4283   PetscFunctionReturn(0);
4284 }
4285 #endif
4286 
4287 #undef __FUNCT__
4288 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4289 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4290 {
4291   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4292   PetscErrorCode ierr;
4293 
4294   PetscFunctionBegin;
4295   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4296   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4297   PetscFunctionReturn(0);
4298 }
4299 
4300 #undef __FUNCT__
4301 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4302 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4303 {
4304   PetscErrorCode ierr;
4305   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4306   PetscInt       *indx;
4307 
4308   PetscFunctionBegin;
4309   /* This routine will ONLY return MPIAIJ type matrix */
4310   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4311   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4312   if (n == PETSC_DECIDE){
4313     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4314   }
4315   /* Check sum(n) = N */
4316   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4317   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4318 
4319   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4320   rstart -= m;
4321 
4322   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4323   for (i=0;i<m;i++) {
4324     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4325     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4326     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4327   }
4328 
4329   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4330   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4331   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4332   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4333   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4334   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4335   PetscFunctionReturn(0);
4336 }
4337 
4338 #undef __FUNCT__
4339 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4340 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4341 {
4342   PetscErrorCode ierr;
4343   PetscInt       m,N,i,rstart,nnz,Ii;
4344   PetscInt       *indx;
4345   PetscScalar    *values;
4346 
4347   PetscFunctionBegin;
4348   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4349   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4350   for (i=0;i<m;i++) {
4351     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4352     Ii    = i + rstart;
4353     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4354     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4355   }
4356   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4357   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4358   PetscFunctionReturn(0);
4359 }
4360 
4361 #undef __FUNCT__
4362 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4363 /*@
4364       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4365                  matrices from each processor
4366 
4367     Collective on MPI_Comm
4368 
4369    Input Parameters:
4370 +    comm - the communicators the parallel matrix will live on
4371 .    inmat - the input sequential matrices
4372 .    n - number of local columns (or PETSC_DECIDE)
4373 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4374 
4375    Output Parameter:
4376 .    outmat - the parallel matrix generated
4377 
4378     Level: advanced
4379 
4380    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4381 
4382 @*/
4383 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4384 {
4385   PetscErrorCode ierr;
4386 
4387   PetscFunctionBegin;
4388   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4389   if (scall == MAT_INITIAL_MATRIX){
4390     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4391   }
4392   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4393   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4394   PetscFunctionReturn(0);
4395 }
4396 
4397 #undef __FUNCT__
4398 #define __FUNCT__ "MatFileSplit"
4399 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4400 {
4401   PetscErrorCode    ierr;
4402   PetscMPIInt       rank;
4403   PetscInt          m,N,i,rstart,nnz;
4404   size_t            len;
4405   const PetscInt    *indx;
4406   PetscViewer       out;
4407   char              *name;
4408   Mat               B;
4409   const PetscScalar *values;
4410 
4411   PetscFunctionBegin;
4412   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4413   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4414   /* Should this be the type of the diagonal block of A? */
4415   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4416   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4417   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4418   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4419   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4420   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4421   for (i=0;i<m;i++) {
4422     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4423     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4424     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4425   }
4426   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4427   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4428 
4429   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4430   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4431   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4432   sprintf(name,"%s.%d",outfile,rank);
4433   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4434   ierr = PetscFree(name);
4435   ierr = MatView(B,out);CHKERRQ(ierr);
4436   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4437   ierr = MatDestroy(&B);CHKERRQ(ierr);
4438   PetscFunctionReturn(0);
4439 }
4440 
4441 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4442 #undef __FUNCT__
4443 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4444 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4445 {
4446   PetscErrorCode       ierr;
4447   Mat_Merge_SeqsToMPI  *merge;
4448   PetscContainer       container;
4449 
4450   PetscFunctionBegin;
4451   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4452   if (container) {
4453     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4454     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4455     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4456     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4457     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4458     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4459     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4460     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4461     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4462     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4463     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4464     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4466     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4467     ierr = PetscFree(merge);CHKERRQ(ierr);
4468     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4469   }
4470   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4471   PetscFunctionReturn(0);
4472 }
4473 
4474 #include <../src/mat/utils/freespace.h>
4475 #include <petscbt.h>
4476 
4477 #undef __FUNCT__
4478 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4479 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4480 {
4481   PetscErrorCode       ierr;
4482   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4483   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4484   PetscMPIInt          size,rank,taga,*len_s;
4485   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4486   PetscInt             proc,m;
4487   PetscInt             **buf_ri,**buf_rj;
4488   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4489   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4490   MPI_Request          *s_waits,*r_waits;
4491   MPI_Status           *status;
4492   MatScalar            *aa=a->a;
4493   MatScalar            **abuf_r,*ba_i;
4494   Mat_Merge_SeqsToMPI  *merge;
4495   PetscContainer       container;
4496 
4497   PetscFunctionBegin;
4498   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4499 
4500   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4501   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4502 
4503   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4504   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4505 
4506   bi     = merge->bi;
4507   bj     = merge->bj;
4508   buf_ri = merge->buf_ri;
4509   buf_rj = merge->buf_rj;
4510 
4511   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4512   owners = merge->rowmap->range;
4513   len_s  = merge->len_s;
4514 
4515   /* send and recv matrix values */
4516   /*-----------------------------*/
4517   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4518   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4519 
4520   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4521   for (proc=0,k=0; proc<size; proc++){
4522     if (!len_s[proc]) continue;
4523     i = owners[proc];
4524     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4525     k++;
4526   }
4527 
4528   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4529   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4530   ierr = PetscFree(status);CHKERRQ(ierr);
4531 
4532   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4533   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4534 
4535   /* insert mat values of mpimat */
4536   /*----------------------------*/
4537   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4538   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4539 
4540   for (k=0; k<merge->nrecv; k++){
4541     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4542     nrows = *(buf_ri_k[k]);
4543     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4544     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4545   }
4546 
4547   /* set values of ba */
4548   m = merge->rowmap->n;
4549   for (i=0; i<m; i++) {
4550     arow = owners[rank] + i;
4551     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4552     bnzi = bi[i+1] - bi[i];
4553     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4554 
4555     /* add local non-zero vals of this proc's seqmat into ba */
4556     anzi = ai[arow+1] - ai[arow];
4557     aj   = a->j + ai[arow];
4558     aa   = a->a + ai[arow];
4559     nextaj = 0;
4560     for (j=0; nextaj<anzi; j++){
4561       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4562         ba_i[j] += aa[nextaj++];
4563       }
4564     }
4565 
4566     /* add received vals into ba */
4567     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4568       /* i-th row */
4569       if (i == *nextrow[k]) {
4570         anzi = *(nextai[k]+1) - *nextai[k];
4571         aj   = buf_rj[k] + *(nextai[k]);
4572         aa   = abuf_r[k] + *(nextai[k]);
4573         nextaj = 0;
4574         for (j=0; nextaj<anzi; j++){
4575           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4576             ba_i[j] += aa[nextaj++];
4577           }
4578         }
4579         nextrow[k]++; nextai[k]++;
4580       }
4581     }
4582     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4583   }
4584   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4585   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4586 
4587   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4588   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4589   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4590   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4591   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4592   PetscFunctionReturn(0);
4593 }
4594 
4595 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4596 
4597 #undef __FUNCT__
4598 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4599 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4600 {
4601   PetscErrorCode       ierr;
4602   Mat                  B_mpi;
4603   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4604   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4605   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4606   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4607   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4608   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4609   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4610   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4611   MPI_Status           *status;
4612   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4613   PetscBT              lnkbt;
4614   Mat_Merge_SeqsToMPI  *merge;
4615   PetscContainer       container;
4616 
4617   PetscFunctionBegin;
4618   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4619 
4620   /* make sure it is a PETSc comm */
4621   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4622   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4623   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4624 
4625   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4626   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4627 
4628   /* determine row ownership */
4629   /*---------------------------------------------------------*/
4630   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4631   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4632   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4633   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4634   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4635   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4636   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4637 
4638   m      = merge->rowmap->n;
4639   M      = merge->rowmap->N;
4640   owners = merge->rowmap->range;
4641 
4642   /* determine the number of messages to send, their lengths */
4643   /*---------------------------------------------------------*/
4644   len_s  = merge->len_s;
4645 
4646   len = 0;  /* length of buf_si[] */
4647   merge->nsend = 0;
4648   for (proc=0; proc<size; proc++){
4649     len_si[proc] = 0;
4650     if (proc == rank){
4651       len_s[proc] = 0;
4652     } else {
4653       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4654       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4655     }
4656     if (len_s[proc]) {
4657       merge->nsend++;
4658       nrows = 0;
4659       for (i=owners[proc]; i<owners[proc+1]; i++){
4660         if (ai[i+1] > ai[i]) nrows++;
4661       }
4662       len_si[proc] = 2*(nrows+1);
4663       len += len_si[proc];
4664     }
4665   }
4666 
4667   /* determine the number and length of messages to receive for ij-structure */
4668   /*-------------------------------------------------------------------------*/
4669   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4670   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4671 
4672   /* post the Irecv of j-structure */
4673   /*-------------------------------*/
4674   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4675   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4676 
4677   /* post the Isend of j-structure */
4678   /*--------------------------------*/
4679   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4680 
4681   for (proc=0, k=0; proc<size; proc++){
4682     if (!len_s[proc]) continue;
4683     i = owners[proc];
4684     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4685     k++;
4686   }
4687 
4688   /* receives and sends of j-structure are complete */
4689   /*------------------------------------------------*/
4690   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4691   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4692 
4693   /* send and recv i-structure */
4694   /*---------------------------*/
4695   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4696   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4697 
4698   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4699   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4700   for (proc=0,k=0; proc<size; proc++){
4701     if (!len_s[proc]) continue;
4702     /* form outgoing message for i-structure:
4703          buf_si[0]:                 nrows to be sent
4704                [1:nrows]:           row index (global)
4705                [nrows+1:2*nrows+1]: i-structure index
4706     */
4707     /*-------------------------------------------*/
4708     nrows = len_si[proc]/2 - 1;
4709     buf_si_i    = buf_si + nrows+1;
4710     buf_si[0]   = nrows;
4711     buf_si_i[0] = 0;
4712     nrows = 0;
4713     for (i=owners[proc]; i<owners[proc+1]; i++){
4714       anzi = ai[i+1] - ai[i];
4715       if (anzi) {
4716         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4717         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4718         nrows++;
4719       }
4720     }
4721     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4722     k++;
4723     buf_si += len_si[proc];
4724   }
4725 
4726   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4727   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4728 
4729   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4730   for (i=0; i<merge->nrecv; i++){
4731     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4732   }
4733 
4734   ierr = PetscFree(len_si);CHKERRQ(ierr);
4735   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4736   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4737   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4738   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4739   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4740   ierr = PetscFree(status);CHKERRQ(ierr);
4741 
4742   /* compute a local seq matrix in each processor */
4743   /*----------------------------------------------*/
4744   /* allocate bi array and free space for accumulating nonzero column info */
4745   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4746   bi[0] = 0;
4747 
4748   /* create and initialize a linked list */
4749   nlnk = N+1;
4750   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4751 
4752   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4753   len = 0;
4754   len  = ai[owners[rank+1]] - ai[owners[rank]];
4755   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4756   current_space = free_space;
4757 
4758   /* determine symbolic info for each local row */
4759   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4760 
4761   for (k=0; k<merge->nrecv; k++){
4762     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4763     nrows = *buf_ri_k[k];
4764     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4765     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4766   }
4767 
4768   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4769   len = 0;
4770   for (i=0;i<m;i++) {
4771     bnzi   = 0;
4772     /* add local non-zero cols of this proc's seqmat into lnk */
4773     arow   = owners[rank] + i;
4774     anzi   = ai[arow+1] - ai[arow];
4775     aj     = a->j + ai[arow];
4776     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4777     bnzi += nlnk;
4778     /* add received col data into lnk */
4779     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4780       if (i == *nextrow[k]) { /* i-th row */
4781         anzi = *(nextai[k]+1) - *nextai[k];
4782         aj   = buf_rj[k] + *nextai[k];
4783         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4784         bnzi += nlnk;
4785         nextrow[k]++; nextai[k]++;
4786       }
4787     }
4788     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4789 
4790     /* if free space is not available, make more free space */
4791     if (current_space->local_remaining<bnzi) {
4792       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4793       nspacedouble++;
4794     }
4795     /* copy data into free space, then initialize lnk */
4796     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4797     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4798 
4799     current_space->array           += bnzi;
4800     current_space->local_used      += bnzi;
4801     current_space->local_remaining -= bnzi;
4802 
4803     bi[i+1] = bi[i] + bnzi;
4804   }
4805 
4806   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4807 
4808   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4809   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4810   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4811 
4812   /* create symbolic parallel matrix B_mpi */
4813   /*---------------------------------------*/
4814     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4815   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4816   if (n==PETSC_DECIDE) {
4817     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4818   } else {
4819     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4820   }
4821   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4822   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4823   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4824   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4825   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4826 
4827   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4828   B_mpi->assembled     = PETSC_FALSE;
4829   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4830   merge->bi            = bi;
4831   merge->bj            = bj;
4832   merge->buf_ri        = buf_ri;
4833   merge->buf_rj        = buf_rj;
4834   merge->coi           = PETSC_NULL;
4835   merge->coj           = PETSC_NULL;
4836   merge->owners_co     = PETSC_NULL;
4837 
4838   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4839 
4840   /* attach the supporting struct to B_mpi for reuse */
4841   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4842   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4843   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4844   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4845   *mpimat = B_mpi;
4846 
4847   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4848   PetscFunctionReturn(0);
4849 }
4850 
4851 #undef __FUNCT__
4852 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4853 /*@C
4854       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4855                  matrices from each processor
4856 
4857     Collective on MPI_Comm
4858 
4859    Input Parameters:
4860 +    comm - the communicators the parallel matrix will live on
4861 .    seqmat - the input sequential matrices
4862 .    m - number of local rows (or PETSC_DECIDE)
4863 .    n - number of local columns (or PETSC_DECIDE)
4864 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4865 
4866    Output Parameter:
4867 .    mpimat - the parallel matrix generated
4868 
4869     Level: advanced
4870 
4871    Notes:
4872      The dimensions of the sequential matrix in each processor MUST be the same.
4873      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4874      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4875 @*/
4876 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4877 {
4878   PetscErrorCode   ierr;
4879   PetscMPIInt     size;
4880 
4881   PetscFunctionBegin;
4882   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4883   if (size == 1){
4884      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4885      if (scall == MAT_INITIAL_MATRIX){
4886        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4887      } else {
4888        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4889      }
4890      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4891      PetscFunctionReturn(0);
4892   }
4893   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4894   if (scall == MAT_INITIAL_MATRIX){
4895     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4896   }
4897   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4898   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4899   PetscFunctionReturn(0);
4900 }
4901 
4902 #undef __FUNCT__
4903 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4904 /*@
4905      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4906           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4907           with MatGetSize()
4908 
4909     Not Collective
4910 
4911    Input Parameters:
4912 +    A - the matrix
4913 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4914 
4915    Output Parameter:
4916 .    A_loc - the local sequential matrix generated
4917 
4918     Level: developer
4919 
4920 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4921 
4922 @*/
4923 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4924 {
4925   PetscErrorCode  ierr;
4926   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4927   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4928   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4929   MatScalar       *aa=a->a,*ba=b->a,*cam;
4930   PetscScalar     *ca;
4931   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4932   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4933   PetscBool       match;
4934 
4935   PetscFunctionBegin;
4936   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4937   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4938   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4939   if (scall == MAT_INITIAL_MATRIX){
4940     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4941     ci[0] = 0;
4942     for (i=0; i<am; i++){
4943       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4944     }
4945     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4946     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4947     k = 0;
4948     for (i=0; i<am; i++) {
4949       ncols_o = bi[i+1] - bi[i];
4950       ncols_d = ai[i+1] - ai[i];
4951       /* off-diagonal portion of A */
4952       for (jo=0; jo<ncols_o; jo++) {
4953         col = cmap[*bj];
4954         if (col >= cstart) break;
4955         cj[k]   = col; bj++;
4956         ca[k++] = *ba++;
4957       }
4958       /* diagonal portion of A */
4959       for (j=0; j<ncols_d; j++) {
4960         cj[k]   = cstart + *aj++;
4961         ca[k++] = *aa++;
4962       }
4963       /* off-diagonal portion of A */
4964       for (j=jo; j<ncols_o; j++) {
4965         cj[k]   = cmap[*bj++];
4966         ca[k++] = *ba++;
4967       }
4968     }
4969     /* put together the new matrix */
4970     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4971     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4972     /* Since these are PETSc arrays, change flags to free them as necessary. */
4973     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4974     mat->free_a  = PETSC_TRUE;
4975     mat->free_ij = PETSC_TRUE;
4976     mat->nonew   = 0;
4977   } else if (scall == MAT_REUSE_MATRIX){
4978     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4979     ci = mat->i; cj = mat->j; cam = mat->a;
4980     for (i=0; i<am; i++) {
4981       /* off-diagonal portion of A */
4982       ncols_o = bi[i+1] - bi[i];
4983       for (jo=0; jo<ncols_o; jo++) {
4984         col = cmap[*bj];
4985         if (col >= cstart) break;
4986         *cam++ = *ba++; bj++;
4987       }
4988       /* diagonal portion of A */
4989       ncols_d = ai[i+1] - ai[i];
4990       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4991       /* off-diagonal portion of A */
4992       for (j=jo; j<ncols_o; j++) {
4993         *cam++ = *ba++; bj++;
4994       }
4995     }
4996   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4997   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4998   PetscFunctionReturn(0);
4999 }
5000 
5001 #undef __FUNCT__
5002 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5003 /*@C
5004      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5005 
5006     Not Collective
5007 
5008    Input Parameters:
5009 +    A - the matrix
5010 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5011 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5012 
5013    Output Parameter:
5014 .    A_loc - the local sequential matrix generated
5015 
5016     Level: developer
5017 
5018 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5019 
5020 @*/
5021 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5022 {
5023   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5024   PetscErrorCode    ierr;
5025   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5026   IS                isrowa,iscola;
5027   Mat               *aloc;
5028   PetscBool       match;
5029 
5030   PetscFunctionBegin;
5031   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5032   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5033   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5034   if (!row){
5035     start = A->rmap->rstart; end = A->rmap->rend;
5036     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5037   } else {
5038     isrowa = *row;
5039   }
5040   if (!col){
5041     start = A->cmap->rstart;
5042     cmap  = a->garray;
5043     nzA   = a->A->cmap->n;
5044     nzB   = a->B->cmap->n;
5045     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5046     ncols = 0;
5047     for (i=0; i<nzB; i++) {
5048       if (cmap[i] < start) idx[ncols++] = cmap[i];
5049       else break;
5050     }
5051     imark = i;
5052     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5053     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5054     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5055   } else {
5056     iscola = *col;
5057   }
5058   if (scall != MAT_INITIAL_MATRIX){
5059     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5060     aloc[0] = *A_loc;
5061   }
5062   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5063   *A_loc = aloc[0];
5064   ierr = PetscFree(aloc);CHKERRQ(ierr);
5065   if (!row){
5066     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5067   }
5068   if (!col){
5069     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5070   }
5071   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5072   PetscFunctionReturn(0);
5073 }
5074 
5075 #undef __FUNCT__
5076 #define __FUNCT__ "MatGetBrowsOfAcols"
5077 /*@C
5078     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5079 
5080     Collective on Mat
5081 
5082    Input Parameters:
5083 +    A,B - the matrices in mpiaij format
5084 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5085 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5086 
5087    Output Parameter:
5088 +    rowb, colb - index sets of rows and columns of B to extract
5089 -    B_seq - the sequential matrix generated
5090 
5091     Level: developer
5092 
5093 @*/
5094 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5095 {
5096   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5097   PetscErrorCode    ierr;
5098   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5099   IS                isrowb,iscolb;
5100   Mat               *bseq=PETSC_NULL;
5101 
5102   PetscFunctionBegin;
5103   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5104     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5105   }
5106   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5107 
5108   if (scall == MAT_INITIAL_MATRIX){
5109     start = A->cmap->rstart;
5110     cmap  = a->garray;
5111     nzA   = a->A->cmap->n;
5112     nzB   = a->B->cmap->n;
5113     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5114     ncols = 0;
5115     for (i=0; i<nzB; i++) {  /* row < local row index */
5116       if (cmap[i] < start) idx[ncols++] = cmap[i];
5117       else break;
5118     }
5119     imark = i;
5120     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5121     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5122     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5123     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5124   } else {
5125     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5126     isrowb = *rowb; iscolb = *colb;
5127     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5128     bseq[0] = *B_seq;
5129   }
5130   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5131   *B_seq = bseq[0];
5132   ierr = PetscFree(bseq);CHKERRQ(ierr);
5133   if (!rowb){
5134     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5135   } else {
5136     *rowb = isrowb;
5137   }
5138   if (!colb){
5139     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5140   } else {
5141     *colb = iscolb;
5142   }
5143   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5144   PetscFunctionReturn(0);
5145 }
5146 
5147 #undef __FUNCT__
5148 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5149 /*
5150     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5151     of the OFF-DIAGONAL portion of local A
5152 
5153     Collective on Mat
5154 
5155    Input Parameters:
5156 +    A,B - the matrices in mpiaij format
5157 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5158 
5159    Output Parameter:
5160 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5161 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5162 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5163 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5164 
5165     Level: developer
5166 
5167 */
5168 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5169 {
5170   VecScatter_MPI_General *gen_to,*gen_from;
5171   PetscErrorCode         ierr;
5172   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5173   Mat_SeqAIJ             *b_oth;
5174   VecScatter             ctx=a->Mvctx;
5175   MPI_Comm               comm=((PetscObject)ctx)->comm;
5176   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5177   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5178   PetscScalar            *rvalues,*svalues;
5179   MatScalar              *b_otha,*bufa,*bufA;
5180   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5181   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5182   MPI_Status             *sstatus,rstatus;
5183   PetscMPIInt            jj;
5184   PetscInt               *cols,sbs,rbs;
5185   PetscScalar            *vals;
5186 
5187   PetscFunctionBegin;
5188   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5189     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5190   }
5191   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5192   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5193 
5194   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5195   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5196   rvalues  = gen_from->values; /* holds the length of receiving row */
5197   svalues  = gen_to->values;   /* holds the length of sending row */
5198   nrecvs   = gen_from->n;
5199   nsends   = gen_to->n;
5200 
5201   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5202   srow     = gen_to->indices;   /* local row index to be sent */
5203   sstarts  = gen_to->starts;
5204   sprocs   = gen_to->procs;
5205   sstatus  = gen_to->sstatus;
5206   sbs      = gen_to->bs;
5207   rstarts  = gen_from->starts;
5208   rprocs   = gen_from->procs;
5209   rbs      = gen_from->bs;
5210 
5211   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5212   if (scall == MAT_INITIAL_MATRIX){
5213     /* i-array */
5214     /*---------*/
5215     /*  post receives */
5216     for (i=0; i<nrecvs; i++){
5217       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5218       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5219       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5220     }
5221 
5222     /* pack the outgoing message */
5223     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5224     sstartsj[0] = 0;  rstartsj[0] = 0;
5225     len = 0; /* total length of j or a array to be sent */
5226     k = 0;
5227     for (i=0; i<nsends; i++){
5228       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5229       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5230       for (j=0; j<nrows; j++) {
5231         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5232         for (l=0; l<sbs; l++){
5233           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5234           rowlen[j*sbs+l] = ncols;
5235           len += ncols;
5236           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5237         }
5238         k++;
5239       }
5240       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5241       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5242     }
5243     /* recvs and sends of i-array are completed */
5244     i = nrecvs;
5245     while (i--) {
5246       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5247     }
5248     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5249 
5250     /* allocate buffers for sending j and a arrays */
5251     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5252     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5253 
5254     /* create i-array of B_oth */
5255     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5256     b_othi[0] = 0;
5257     len = 0; /* total length of j or a array to be received */
5258     k = 0;
5259     for (i=0; i<nrecvs; i++){
5260       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5261       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5262       for (j=0; j<nrows; j++) {
5263         b_othi[k+1] = b_othi[k] + rowlen[j];
5264         len += rowlen[j]; k++;
5265       }
5266       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5267     }
5268 
5269     /* allocate space for j and a arrrays of B_oth */
5270     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5271     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5272 
5273     /* j-array */
5274     /*---------*/
5275     /*  post receives of j-array */
5276     for (i=0; i<nrecvs; i++){
5277       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5278       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5279     }
5280 
5281     /* pack the outgoing message j-array */
5282     k = 0;
5283     for (i=0; i<nsends; i++){
5284       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5285       bufJ = bufj+sstartsj[i];
5286       for (j=0; j<nrows; j++) {
5287         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5288         for (ll=0; ll<sbs; ll++){
5289           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5290           for (l=0; l<ncols; l++){
5291             *bufJ++ = cols[l];
5292           }
5293           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5294         }
5295       }
5296       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5297     }
5298 
5299     /* recvs and sends of j-array are completed */
5300     i = nrecvs;
5301     while (i--) {
5302       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5303     }
5304     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5305   } else if (scall == MAT_REUSE_MATRIX){
5306     sstartsj = *startsj_s;
5307     rstartsj = *startsj_r;
5308     bufa     = *bufa_ptr;
5309     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5310     b_otha   = b_oth->a;
5311   } else {
5312     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5313   }
5314 
5315   /* a-array */
5316   /*---------*/
5317   /*  post receives of a-array */
5318   for (i=0; i<nrecvs; i++){
5319     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5320     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5321   }
5322 
5323   /* pack the outgoing message a-array */
5324   k = 0;
5325   for (i=0; i<nsends; i++){
5326     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5327     bufA = bufa+sstartsj[i];
5328     for (j=0; j<nrows; j++) {
5329       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5330       for (ll=0; ll<sbs; ll++){
5331         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5332         for (l=0; l<ncols; l++){
5333           *bufA++ = vals[l];
5334         }
5335         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5336       }
5337     }
5338     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5339   }
5340   /* recvs and sends of a-array are completed */
5341   i = nrecvs;
5342   while (i--) {
5343     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5344   }
5345   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5346   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5347 
5348   if (scall == MAT_INITIAL_MATRIX){
5349     /* put together the new matrix */
5350     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5351 
5352     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5353     /* Since these are PETSc arrays, change flags to free them as necessary. */
5354     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5355     b_oth->free_a  = PETSC_TRUE;
5356     b_oth->free_ij = PETSC_TRUE;
5357     b_oth->nonew   = 0;
5358 
5359     ierr = PetscFree(bufj);CHKERRQ(ierr);
5360     if (!startsj_s || !bufa_ptr){
5361       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5362       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5363     } else {
5364       *startsj_s = sstartsj;
5365       *startsj_r = rstartsj;
5366       *bufa_ptr  = bufa;
5367     }
5368   }
5369   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5370   PetscFunctionReturn(0);
5371 }
5372 
5373 #undef __FUNCT__
5374 #define __FUNCT__ "MatGetCommunicationStructs"
5375 /*@C
5376   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5377 
5378   Not Collective
5379 
5380   Input Parameters:
5381 . A - The matrix in mpiaij format
5382 
5383   Output Parameter:
5384 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5385 . colmap - A map from global column index to local index into lvec
5386 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5387 
5388   Level: developer
5389 
5390 @*/
5391 #if defined (PETSC_USE_CTABLE)
5392 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5393 #else
5394 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5395 #endif
5396 {
5397   Mat_MPIAIJ *a;
5398 
5399   PetscFunctionBegin;
5400   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5401   PetscValidPointer(lvec, 2);
5402   PetscValidPointer(colmap, 3);
5403   PetscValidPointer(multScatter, 4);
5404   a = (Mat_MPIAIJ *) A->data;
5405   if (lvec) *lvec = a->lvec;
5406   if (colmap) *colmap = a->colmap;
5407   if (multScatter) *multScatter = a->Mvctx;
5408   PetscFunctionReturn(0);
5409 }
5410 
5411 EXTERN_C_BEGIN
5412 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5413 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5414 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5415 EXTERN_C_END
5416 
5417 #undef __FUNCT__
5418 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5419 /*
5420     Computes (B'*A')' since computing B*A directly is untenable
5421 
5422                n                       p                          p
5423         (              )       (              )         (                  )
5424       m (      A       )  *  n (       B      )   =   m (         C        )
5425         (              )       (              )         (                  )
5426 
5427 */
5428 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5429 {
5430   PetscErrorCode     ierr;
5431   Mat                At,Bt,Ct;
5432 
5433   PetscFunctionBegin;
5434   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5435   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5436   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5437   ierr = MatDestroy(&At);CHKERRQ(ierr);
5438   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5439   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5440   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5441   PetscFunctionReturn(0);
5442 }
5443 
5444 #undef __FUNCT__
5445 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5446 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5447 {
5448   PetscErrorCode ierr;
5449   PetscInt       m=A->rmap->n,n=B->cmap->n;
5450   Mat            Cmat;
5451 
5452   PetscFunctionBegin;
5453   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5454   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5455   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5456   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5457   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5458   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5459   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5460   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5461   *C   = Cmat;
5462   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5463   PetscFunctionReturn(0);
5464 }
5465 
5466 /* ----------------------------------------------------------------*/
5467 #undef __FUNCT__
5468 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5469 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5470 {
5471   PetscErrorCode ierr;
5472 
5473   PetscFunctionBegin;
5474   if (scall == MAT_INITIAL_MATRIX){
5475     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5476   }
5477   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5478   PetscFunctionReturn(0);
5479 }
5480 
5481 EXTERN_C_BEGIN
5482 #if defined(PETSC_HAVE_MUMPS)
5483 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5484 #endif
5485 #if defined(PETSC_HAVE_PASTIX)
5486 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5487 #endif
5488 #if defined(PETSC_HAVE_SUPERLU_DIST)
5489 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5490 #endif
5491 #if defined(PETSC_HAVE_SPOOLES)
5492 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5493 #endif
5494 EXTERN_C_END
5495 
5496 /*MC
5497    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5498 
5499    Options Database Keys:
5500 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5501 
5502   Level: beginner
5503 
5504 .seealso: MatCreateAIJ()
5505 M*/
5506 
5507 EXTERN_C_BEGIN
5508 #undef __FUNCT__
5509 #define __FUNCT__ "MatCreate_MPIAIJ"
5510 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5511 {
5512   Mat_MPIAIJ     *b;
5513   PetscErrorCode ierr;
5514   PetscMPIInt    size;
5515 
5516   PetscFunctionBegin;
5517   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5518 
5519   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5520   B->data         = (void*)b;
5521   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5522   B->assembled    = PETSC_FALSE;
5523 
5524   B->insertmode   = NOT_SET_VALUES;
5525   b->size         = size;
5526   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5527 
5528   /* build cache for off array entries formed */
5529   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5530   b->donotstash  = PETSC_FALSE;
5531   b->colmap      = 0;
5532   b->garray      = 0;
5533   b->roworiented = PETSC_TRUE;
5534 
5535   /* stuff used for matrix vector multiply */
5536   b->lvec      = PETSC_NULL;
5537   b->Mvctx     = PETSC_NULL;
5538 
5539   /* stuff for MatGetRow() */
5540   b->rowindices   = 0;
5541   b->rowvalues    = 0;
5542   b->getrowactive = PETSC_FALSE;
5543 
5544 #if defined(PETSC_HAVE_SPOOLES)
5545   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5546                                      "MatGetFactor_mpiaij_spooles",
5547                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5548 #endif
5549 #if defined(PETSC_HAVE_MUMPS)
5550   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5551                                      "MatGetFactor_aij_mumps",
5552                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5553 #endif
5554 #if defined(PETSC_HAVE_PASTIX)
5555   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5556 					   "MatGetFactor_mpiaij_pastix",
5557 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5558 #endif
5559 #if defined(PETSC_HAVE_SUPERLU_DIST)
5560   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5561                                      "MatGetFactor_mpiaij_superlu_dist",
5562                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5563 #endif
5564   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5565                                      "MatStoreValues_MPIAIJ",
5566                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5567   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5568                                      "MatRetrieveValues_MPIAIJ",
5569                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5570   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5571 				     "MatGetDiagonalBlock_MPIAIJ",
5572                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5573   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5574 				     "MatIsTranspose_MPIAIJ",
5575 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5577 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5578 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5579   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5580 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5581 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5582   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5583 				     "MatDiagonalScaleLocal_MPIAIJ",
5584 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5585   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5586                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5587                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5589                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5590                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5592                                      "MatConvert_MPIAIJ_MPISBAIJ",
5593                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5595                                      "MatMatMult_MPIDense_MPIAIJ",
5596                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5598                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5599                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5601                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5602                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5604   PetscFunctionReturn(0);
5605 }
5606 EXTERN_C_END
5607 
5608 #undef __FUNCT__
5609 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5610 /*@
5611      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5612          and "off-diagonal" part of the matrix in CSR format.
5613 
5614    Collective on MPI_Comm
5615 
5616    Input Parameters:
5617 +  comm - MPI communicator
5618 .  m - number of local rows (Cannot be PETSC_DECIDE)
5619 .  n - This value should be the same as the local size used in creating the
5620        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5621        calculated if N is given) For square matrices n is almost always m.
5622 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5623 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5624 .   i - row indices for "diagonal" portion of matrix
5625 .   j - column indices
5626 .   a - matrix values
5627 .   oi - row indices for "off-diagonal" portion of matrix
5628 .   oj - column indices
5629 -   oa - matrix values
5630 
5631    Output Parameter:
5632 .   mat - the matrix
5633 
5634    Level: advanced
5635 
5636    Notes:
5637        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5638        must free the arrays once the matrix has been destroyed and not before.
5639 
5640        The i and j indices are 0 based
5641 
5642        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5643 
5644        This sets local rows and cannot be used to set off-processor values.
5645 
5646        You cannot later use MatSetValues() to change values in this matrix.
5647 
5648 .keywords: matrix, aij, compressed row, sparse, parallel
5649 
5650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5651           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5652 @*/
5653 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5654 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5655 {
5656   PetscErrorCode ierr;
5657   Mat_MPIAIJ     *maij;
5658 
5659  PetscFunctionBegin;
5660   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5661   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5662   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5663   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5664   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5665   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5666   maij = (Mat_MPIAIJ*) (*mat)->data;
5667   maij->donotstash     = PETSC_TRUE;
5668   (*mat)->preallocated = PETSC_TRUE;
5669 
5670   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5671   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5672 
5673   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5674   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5675 
5676   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5677   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5678   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5679   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5680 
5681   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5682   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5683   PetscFunctionReturn(0);
5684 }
5685 
5686 /*
5687     Special version for direct calls from Fortran
5688 */
5689 #include <petsc-private/fortranimpl.h>
5690 
5691 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5692 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5693 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5694 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5695 #endif
5696 
5697 /* Change these macros so can be used in void function */
5698 #undef CHKERRQ
5699 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5700 #undef SETERRQ2
5701 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5702 #undef SETERRQ3
5703 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5704 #undef SETERRQ
5705 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5706 
5707 EXTERN_C_BEGIN
5708 #undef __FUNCT__
5709 #define __FUNCT__ "matsetvaluesmpiaij_"
5710 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5711 {
5712   Mat             mat = *mmat;
5713   PetscInt        m = *mm, n = *mn;
5714   InsertMode      addv = *maddv;
5715   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5716   PetscScalar     value;
5717   PetscErrorCode  ierr;
5718 
5719   MatCheckPreallocated(mat,1);
5720   if (mat->insertmode == NOT_SET_VALUES) {
5721     mat->insertmode = addv;
5722   }
5723 #if defined(PETSC_USE_DEBUG)
5724   else if (mat->insertmode != addv) {
5725     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5726   }
5727 #endif
5728   {
5729   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5730   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5731   PetscBool       roworiented = aij->roworiented;
5732 
5733   /* Some Variables required in the macro */
5734   Mat             A = aij->A;
5735   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5736   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5737   MatScalar       *aa = a->a;
5738   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5739   Mat             B = aij->B;
5740   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5741   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5742   MatScalar       *ba = b->a;
5743 
5744   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5745   PetscInt        nonew = a->nonew;
5746   MatScalar       *ap1,*ap2;
5747 
5748   PetscFunctionBegin;
5749   for (i=0; i<m; i++) {
5750     if (im[i] < 0) continue;
5751 #if defined(PETSC_USE_DEBUG)
5752     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5753 #endif
5754     if (im[i] >= rstart && im[i] < rend) {
5755       row      = im[i] - rstart;
5756       lastcol1 = -1;
5757       rp1      = aj + ai[row];
5758       ap1      = aa + ai[row];
5759       rmax1    = aimax[row];
5760       nrow1    = ailen[row];
5761       low1     = 0;
5762       high1    = nrow1;
5763       lastcol2 = -1;
5764       rp2      = bj + bi[row];
5765       ap2      = ba + bi[row];
5766       rmax2    = bimax[row];
5767       nrow2    = bilen[row];
5768       low2     = 0;
5769       high2    = nrow2;
5770 
5771       for (j=0; j<n; j++) {
5772         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5773         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5774         if (in[j] >= cstart && in[j] < cend){
5775           col = in[j] - cstart;
5776           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5777         } else if (in[j] < 0) continue;
5778 #if defined(PETSC_USE_DEBUG)
5779         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5780 #endif
5781         else {
5782           if (mat->was_assembled) {
5783             if (!aij->colmap) {
5784               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5785             }
5786 #if defined (PETSC_USE_CTABLE)
5787             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5788 	    col--;
5789 #else
5790             col = aij->colmap[in[j]] - 1;
5791 #endif
5792             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5793               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5794               col =  in[j];
5795               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5796               B = aij->B;
5797               b = (Mat_SeqAIJ*)B->data;
5798               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5799               rp2      = bj + bi[row];
5800               ap2      = ba + bi[row];
5801               rmax2    = bimax[row];
5802               nrow2    = bilen[row];
5803               low2     = 0;
5804               high2    = nrow2;
5805               bm       = aij->B->rmap->n;
5806               ba = b->a;
5807             }
5808           } else col = in[j];
5809           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5810         }
5811       }
5812     } else {
5813       if (!aij->donotstash) {
5814         if (roworiented) {
5815           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5816         } else {
5817           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5818         }
5819       }
5820     }
5821   }}
5822   PetscFunctionReturnVoid();
5823 }
5824 EXTERN_C_END
5825 
5826