xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 74778d6c3e36e5aa800405c1381bdd58dfd20219)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         mat->assembled = PETSC_FALSE;
545         if (roworiented) {
546           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
547         } else {
548           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
549         }
550       }
551     }
552   }
553   PetscFunctionReturn(0);
554 }
555 
556 #undef __FUNCT__
557 #define __FUNCT__ "MatGetValues_MPIAIJ"
558 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
559 {
560   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
561   PetscErrorCode ierr;
562   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
563   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
564 
565   PetscFunctionBegin;
566   for (i=0; i<m; i++) {
567     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
568     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
569     if (idxm[i] >= rstart && idxm[i] < rend) {
570       row = idxm[i] - rstart;
571       for (j=0; j<n; j++) {
572         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
573         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
574         if (idxn[j] >= cstart && idxn[j] < cend){
575           col = idxn[j] - cstart;
576           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
577         } else {
578           if (!aij->colmap) {
579             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
580           }
581 #if defined (PETSC_USE_CTABLE)
582           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
583           col --;
584 #else
585           col = aij->colmap[idxn[j]] - 1;
586 #endif
587           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
588           else {
589             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
590           }
591         }
592       }
593     } else {
594       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
595     }
596   }
597   PetscFunctionReturn(0);
598 }
599 
600 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
601 
602 #undef __FUNCT__
603 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
604 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
605 {
606   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
607   PetscErrorCode ierr;
608   PetscInt       nstash,reallocs;
609   InsertMode     addv;
610 
611   PetscFunctionBegin;
612   if (aij->donotstash || mat->nooffprocentries) {
613     PetscFunctionReturn(0);
614   }
615 
616   /* make sure all processors are either in INSERTMODE or ADDMODE */
617   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
618   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
619   mat->insertmode = addv; /* in case this processor had no cache */
620 
621   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
622   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
623   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNCT__
628 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
629 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
633   PetscErrorCode ierr;
634   PetscMPIInt    n;
635   PetscInt       i,j,rstart,ncols,flg;
636   PetscInt       *row,*col;
637   PetscBool      other_disassembled;
638   PetscScalar    *val;
639   InsertMode     addv = mat->insertmode;
640 
641   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
642   PetscFunctionBegin;
643   if (!aij->donotstash && !mat->nooffprocentries) {
644     while (1) {
645       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
646       if (!flg) break;
647 
648       for (i=0; i<n;) {
649         /* Now identify the consecutive vals belonging to the same row */
650         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
651         if (j < n) ncols = j-i;
652         else       ncols = n-i;
653         /* Now assemble all these values with a single function call */
654         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
655         i = j;
656       }
657     }
658     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
659   }
660   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
661   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
662 
663   /* determine if any processor has disassembled, if so we must
664      also disassemble ourselfs, in order that we may reassemble. */
665   /*
666      if nonzero structure of submatrix B cannot change then we know that
667      no processor disassembled thus we can skip this stuff
668   */
669   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
670     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
671     if (mat->was_assembled && !other_disassembled) {
672       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
673     }
674   }
675   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
676     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
677   }
678   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
680   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
681   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
682 
683   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
684   aij->rowvalues = 0;
685 
686   /* used by MatAXPY() */
687   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
688   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
689 
690   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
691   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
697 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
698 {
699   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
700   PetscErrorCode ierr;
701 
702   PetscFunctionBegin;
703   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
704   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatZeroRows_MPIAIJ"
710 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
711 {
712   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode    ierr;
714   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
715   PetscInt          i,*owners = A->rmap->range;
716   PetscInt          *nprocs,j,idx,nsends,row;
717   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
718   PetscInt          *rvalues,count,base,slen,*source;
719   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
720   MPI_Comm          comm = ((PetscObject)A)->comm;
721   MPI_Request       *send_waits,*recv_waits;
722   MPI_Status        recv_status,*send_status;
723   const PetscScalar *xx;
724   PetscScalar       *bb;
725 #if defined(PETSC_DEBUG)
726   PetscBool      found = PETSC_FALSE;
727 #endif
728 
729   PetscFunctionBegin;
730   /*  first count number of contributors to each processor */
731   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
732   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
733   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
734   j = 0;
735   for (i=0; i<N; i++) {
736     if (lastidx > (idx = rows[i])) j = 0;
737     lastidx = idx;
738     for (; j<size; j++) {
739       if (idx >= owners[j] && idx < owners[j+1]) {
740         nprocs[2*j]++;
741         nprocs[2*j+1] = 1;
742         owner[i] = j;
743 #if defined(PETSC_DEBUG)
744         found = PETSC_TRUE;
745 #endif
746         break;
747       }
748     }
749 #if defined(PETSC_DEBUG)
750     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
751     found = PETSC_FALSE;
752 #endif
753   }
754   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
755 
756   if (A->nooffproczerorows) {
757     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
758     nrecvs = nsends;
759     nmax   = N;
760   } else {
761     /* inform other processors of number of messages and max length*/
762     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
763   }
764 
765   /* post receives:   */
766   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
767   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
768   for (i=0; i<nrecvs; i++) {
769     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
770   }
771 
772   /* do sends:
773       1) starts[i] gives the starting index in svalues for stuff going to
774          the ith processor
775   */
776   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
777   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
778   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
779   starts[0] = 0;
780   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
781   for (i=0; i<N; i++) {
782     svalues[starts[owner[i]]++] = rows[i];
783   }
784 
785   starts[0] = 0;
786   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
787   count = 0;
788   for (i=0; i<size; i++) {
789     if (nprocs[2*i+1]) {
790       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
791     }
792   }
793   ierr = PetscFree(starts);CHKERRQ(ierr);
794 
795   base = owners[rank];
796 
797   /*  wait on receives */
798   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
799   count  = nrecvs; slen = 0;
800   while (count) {
801     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
802     /* unpack receives into our local space */
803     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
804     source[imdex]  = recv_status.MPI_SOURCE;
805     lens[imdex]    = n;
806     slen          += n;
807     count--;
808   }
809   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
810 
811   /* move the data into the send scatter */
812   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
813   count = 0;
814   for (i=0; i<nrecvs; i++) {
815     values = rvalues + i*nmax;
816     for (j=0; j<lens[i]; j++) {
817       lrows[count++] = values[j] - base;
818     }
819   }
820   ierr = PetscFree(rvalues);CHKERRQ(ierr);
821   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
822   ierr = PetscFree(owner);CHKERRQ(ierr);
823   ierr = PetscFree(nprocs);CHKERRQ(ierr);
824 
825   /* fix right hand side if needed */
826   if (x && b) {
827     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
828     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
829     for (i=0; i<slen; i++) {
830       bb[lrows[i]] = diag*xx[lrows[i]];
831     }
832     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
833     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
834   }
835   /*
836         Zero the required rows. If the "diagonal block" of the matrix
837      is square and the user wishes to set the diagonal we use separate
838      code so that MatSetValues() is not called for each diagonal allocating
839      new memory, thus calling lots of mallocs and slowing things down.
840 
841   */
842   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
843   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
844   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
845     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
846   } else if (diag != 0.0) {
847     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
849       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
850 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
851     }
852     for (i = 0; i < slen; i++) {
853       row  = lrows[i] + rstart;
854       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
855     }
856     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
858   } else {
859     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(lrows);CHKERRQ(ierr);
862 
863   /* wait on sends */
864   if (nsends) {
865     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
866     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
867     ierr = PetscFree(send_status);CHKERRQ(ierr);
868   }
869   ierr = PetscFree(send_waits);CHKERRQ(ierr);
870   ierr = PetscFree(svalues);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
876 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
877 {
878   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
879   PetscErrorCode    ierr;
880   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
881   PetscInt          i,*owners = A->rmap->range;
882   PetscInt          *nprocs,j,idx,nsends;
883   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
884   PetscInt          *rvalues,count,base,slen,*source;
885   PetscInt          *lens,*lrows,*values,m;
886   MPI_Comm          comm = ((PetscObject)A)->comm;
887   MPI_Request       *send_waits,*recv_waits;
888   MPI_Status        recv_status,*send_status;
889   const PetscScalar *xx;
890   PetscScalar       *bb,*mask;
891   Vec               xmask,lmask;
892   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
893   const PetscInt    *aj, *ii,*ridx;
894   PetscScalar       *aa;
895 #if defined(PETSC_DEBUG)
896   PetscBool         found = PETSC_FALSE;
897 #endif
898 
899   PetscFunctionBegin;
900   /*  first count number of contributors to each processor */
901   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
902   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
903   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
904   j = 0;
905   for (i=0; i<N; i++) {
906     if (lastidx > (idx = rows[i])) j = 0;
907     lastidx = idx;
908     for (; j<size; j++) {
909       if (idx >= owners[j] && idx < owners[j+1]) {
910         nprocs[2*j]++;
911         nprocs[2*j+1] = 1;
912         owner[i] = j;
913 #if defined(PETSC_DEBUG)
914         found = PETSC_TRUE;
915 #endif
916         break;
917       }
918     }
919 #if defined(PETSC_DEBUG)
920     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
921     found = PETSC_FALSE;
922 #endif
923   }
924   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
925 
926   /* inform other processors of number of messages and max length*/
927   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
928 
929   /* post receives:   */
930   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
931   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
932   for (i=0; i<nrecvs; i++) {
933     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
934   }
935 
936   /* do sends:
937       1) starts[i] gives the starting index in svalues for stuff going to
938          the ith processor
939   */
940   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
941   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
942   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
943   starts[0] = 0;
944   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
945   for (i=0; i<N; i++) {
946     svalues[starts[owner[i]]++] = rows[i];
947   }
948 
949   starts[0] = 0;
950   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
951   count = 0;
952   for (i=0; i<size; i++) {
953     if (nprocs[2*i+1]) {
954       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
955     }
956   }
957   ierr = PetscFree(starts);CHKERRQ(ierr);
958 
959   base = owners[rank];
960 
961   /*  wait on receives */
962   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
963   count  = nrecvs; slen = 0;
964   while (count) {
965     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
966     /* unpack receives into our local space */
967     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
968     source[imdex]  = recv_status.MPI_SOURCE;
969     lens[imdex]    = n;
970     slen          += n;
971     count--;
972   }
973   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
974 
975   /* move the data into the send scatter */
976   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
977   count = 0;
978   for (i=0; i<nrecvs; i++) {
979     values = rvalues + i*nmax;
980     for (j=0; j<lens[i]; j++) {
981       lrows[count++] = values[j] - base;
982     }
983   }
984   ierr = PetscFree(rvalues);CHKERRQ(ierr);
985   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
986   ierr = PetscFree(owner);CHKERRQ(ierr);
987   ierr = PetscFree(nprocs);CHKERRQ(ierr);
988   /* lrows are the local rows to be zeroed, slen is the number of local rows */
989 
990   /* zero diagonal part of matrix */
991   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
992 
993   /* handle off diagonal part of matrix */
994   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
995   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
996   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
997   for (i=0; i<slen; i++) {
998     bb[lrows[i]] = 1;
999   }
1000   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1001   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1004   if (x) {
1005     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1008     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1009   }
1010   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1011 
1012   /* remove zeroed rows of off diagonal matrix */
1013   ii = aij->i;
1014   for (i=0; i<slen; i++) {
1015     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1016   }
1017 
1018   /* loop over all elements of off process part of matrix zeroing removed columns*/
1019   if (aij->compressedrow.use){
1020     m    = aij->compressedrow.nrows;
1021     ii   = aij->compressedrow.i;
1022     ridx = aij->compressedrow.rindex;
1023     for (i=0; i<m; i++){
1024       n   = ii[i+1] - ii[i];
1025       aj  = aij->j + ii[i];
1026       aa  = aij->a + ii[i];
1027 
1028       for (j=0; j<n; j++) {
1029         if (PetscAbsScalar(mask[*aj])) {
1030           if (b) bb[*ridx] -= *aa*xx[*aj];
1031           *aa        = 0.0;
1032         }
1033         aa++;
1034         aj++;
1035       }
1036       ridx++;
1037     }
1038   } else { /* do not use compressed row format */
1039     m = l->B->rmap->n;
1040     for (i=0; i<m; i++) {
1041       n   = ii[i+1] - ii[i];
1042       aj  = aij->j + ii[i];
1043       aa  = aij->a + ii[i];
1044       for (j=0; j<n; j++) {
1045         if (PetscAbsScalar(mask[*aj])) {
1046           if (b) bb[i] -= *aa*xx[*aj];
1047           *aa    = 0.0;
1048         }
1049         aa++;
1050         aj++;
1051       }
1052     }
1053   }
1054   if (x) {
1055     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1056     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1057   }
1058   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1059   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1060   ierr = PetscFree(lrows);CHKERRQ(ierr);
1061 
1062   /* wait on sends */
1063   if (nsends) {
1064     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1065     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1066     ierr = PetscFree(send_status);CHKERRQ(ierr);
1067   }
1068   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1069   ierr = PetscFree(svalues);CHKERRQ(ierr);
1070 
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatMult_MPIAIJ"
1076 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1077 {
1078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1079   PetscErrorCode ierr;
1080   PetscInt       nt;
1081 
1082   PetscFunctionBegin;
1083   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1084   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1085   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1086   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1094 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1095 {
1096   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1106 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1107 {
1108   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1114   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1121 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1122 {
1123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1124   PetscErrorCode ierr;
1125   PetscBool      merged;
1126 
1127   PetscFunctionBegin;
1128   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1129   /* do nondiagonal part */
1130   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1131   if (!merged) {
1132     /* send it on its way */
1133     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1134     /* do local part */
1135     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1136     /* receive remote parts: note this assumes the values are not actually */
1137     /* added in yy until the next line, */
1138     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139   } else {
1140     /* do local part */
1141     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1142     /* send it on its way */
1143     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1144     /* values actually were received in the Begin() but we need to call this nop */
1145     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1146   }
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 EXTERN_C_BEGIN
1151 #undef __FUNCT__
1152 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1153 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1154 {
1155   MPI_Comm       comm;
1156   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1157   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1158   IS             Me,Notme;
1159   PetscErrorCode ierr;
1160   PetscInt       M,N,first,last,*notme,i;
1161   PetscMPIInt    size;
1162 
1163   PetscFunctionBegin;
1164 
1165   /* Easy test: symmetric diagonal block */
1166   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1167   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1168   if (!*f) PetscFunctionReturn(0);
1169   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1170   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1171   if (size == 1) PetscFunctionReturn(0);
1172 
1173   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1174   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1175   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1176   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1177   for (i=0; i<first; i++) notme[i] = i;
1178   for (i=last; i<M; i++) notme[i-last+first] = i;
1179   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1180   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1181   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1182   Aoff = Aoffs[0];
1183   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1184   Boff = Boffs[0];
1185   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1187   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1189   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1190   ierr = PetscFree(notme);CHKERRQ(ierr);
1191    PetscFunctionReturn(0);
1192 }
1193 EXTERN_C_END
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1197 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   /* do nondiagonal part */
1204   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1205   /* send it on its way */
1206   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1207   /* do local part */
1208   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1209   /* receive remote parts */
1210   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 /*
1215   This only works correctly for square matrices where the subblock A->A is the
1216    diagonal block
1217 */
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1220 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1221 {
1222   PetscErrorCode ierr;
1223   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1224 
1225   PetscFunctionBegin;
1226   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1227   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1228   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatScale_MPIAIJ"
1234 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1235 {
1236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1241   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatDestroy_MPIAIJ"
1247 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1248 {
1249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253 #if defined(PETSC_USE_LOG)
1254   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1255 #endif
1256   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1257   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1259   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1260 #if defined (PETSC_USE_CTABLE)
1261   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1262 #else
1263   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1264 #endif
1265   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1266   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1267   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1268   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1269   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1270   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1271 
1272   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1286 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1287 {
1288   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1289   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1290   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1291   PetscErrorCode    ierr;
1292   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1293   int               fd;
1294   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1295   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1296   PetscScalar       *column_values;
1297   PetscInt          message_count,flowcontrolcount;
1298 
1299   PetscFunctionBegin;
1300   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1301   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1302   nz   = A->nz + B->nz;
1303   if (!rank) {
1304     header[0] = MAT_FILE_CLASSID;
1305     header[1] = mat->rmap->N;
1306     header[2] = mat->cmap->N;
1307     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1308     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1309     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1310     /* get largest number of rows any processor has */
1311     rlen = mat->rmap->n;
1312     range = mat->rmap->range;
1313     for (i=1; i<size; i++) {
1314       rlen = PetscMax(rlen,range[i+1] - range[i]);
1315     }
1316   } else {
1317     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1318     rlen = mat->rmap->n;
1319   }
1320 
1321   /* load up the local row counts */
1322   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1323   for (i=0; i<mat->rmap->n; i++) {
1324     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1325   }
1326 
1327   /* store the row lengths to the file */
1328   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1329   if (!rank) {
1330     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1331     for (i=1; i<size; i++) {
1332       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1333       rlen = range[i+1] - range[i];
1334       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     }
1337     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1338   } else {
1339     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1340     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1341     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1342   }
1343   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1344 
1345   /* load up the local column indices */
1346   nzmax = nz; /* )th processor needs space a largest processor needs */
1347   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1348   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1349   cnt  = 0;
1350   for (i=0; i<mat->rmap->n; i++) {
1351     for (j=B->i[i]; j<B->i[i+1]; j++) {
1352       if ( (col = garray[B->j[j]]) > cstart) break;
1353       column_indices[cnt++] = col;
1354     }
1355     for (k=A->i[i]; k<A->i[i+1]; k++) {
1356       column_indices[cnt++] = A->j[k] + cstart;
1357     }
1358     for (; j<B->i[i+1]; j++) {
1359       column_indices[cnt++] = garray[B->j[j]];
1360     }
1361   }
1362   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1363 
1364   /* store the column indices to the file */
1365    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1366   if (!rank) {
1367     MPI_Status status;
1368     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1369     for (i=1; i<size; i++) {
1370       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1371       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1372       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1373       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1374       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1375     }
1376      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1377   } else {
1378     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1379     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1381     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1382   }
1383   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1384 
1385   /* load up the local column values */
1386   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1387   cnt  = 0;
1388   for (i=0; i<mat->rmap->n; i++) {
1389     for (j=B->i[i]; j<B->i[i+1]; j++) {
1390       if ( garray[B->j[j]] > cstart) break;
1391       column_values[cnt++] = B->a[j];
1392     }
1393     for (k=A->i[i]; k<A->i[i+1]; k++) {
1394       column_values[cnt++] = A->a[k];
1395     }
1396     for (; j<B->i[i+1]; j++) {
1397       column_values[cnt++] = B->a[j];
1398     }
1399   }
1400   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1401 
1402   /* store the column values to the file */
1403    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1404   if (!rank) {
1405     MPI_Status status;
1406     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1407     for (i=1; i<size; i++) {
1408        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1409       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1410       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1411       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1412       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1413     }
1414     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1415   } else {
1416     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1417     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1419     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1420   }
1421   ierr = PetscFree(column_values);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1427 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1428 {
1429   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1430   PetscErrorCode    ierr;
1431   PetscMPIInt       rank = aij->rank,size = aij->size;
1432   PetscBool         isdraw,iascii,isbinary;
1433   PetscViewer       sviewer;
1434   PetscViewerFormat format;
1435 
1436   PetscFunctionBegin;
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1439   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1440   if (iascii) {
1441     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1442     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1443       MatInfo    info;
1444       PetscBool  inodes;
1445 
1446       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1447       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1448       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1449       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1450       if (!inodes) {
1451         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1452 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1453       } else {
1454         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1455 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1456       }
1457       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1458       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1459       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1461       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1463       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1464       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1465       PetscFunctionReturn(0);
1466     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1467       PetscInt   inodecount,inodelimit,*inodes;
1468       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1469       if (inodes) {
1470         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1471       } else {
1472         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1473       }
1474       PetscFunctionReturn(0);
1475     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1476       PetscFunctionReturn(0);
1477     }
1478   } else if (isbinary) {
1479     if (size == 1) {
1480       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1481       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1482     } else {
1483       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1484     }
1485     PetscFunctionReturn(0);
1486   } else if (isdraw) {
1487     PetscDraw  draw;
1488     PetscBool  isnull;
1489     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1490     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1491   }
1492 
1493   if (size == 1) {
1494     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1495     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1496   } else {
1497     /* assemble the entire matrix onto first processor. */
1498     Mat         A;
1499     Mat_SeqAIJ  *Aloc;
1500     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1501     MatScalar   *a;
1502 
1503     if (mat->rmap->N > 1024) {
1504       PetscBool  flg = PETSC_FALSE;
1505 
1506       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1507       if (!flg) {
1508         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1509       }
1510     }
1511 
1512     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1513     if (!rank) {
1514       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1517     }
1518     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1519     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1520     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1521     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1522     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1523 
1524     /* copy over the A part */
1525     Aloc = (Mat_SeqAIJ*)aij->A->data;
1526     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1527     row = mat->rmap->rstart;
1528     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1529     for (i=0; i<m; i++) {
1530       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1531       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1532     }
1533     aj = Aloc->j;
1534     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1535 
1536     /* copy over the B part */
1537     Aloc = (Mat_SeqAIJ*)aij->B->data;
1538     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1539     row  = mat->rmap->rstart;
1540     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1541     ct   = cols;
1542     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1543     for (i=0; i<m; i++) {
1544       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1545       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1546     }
1547     ierr = PetscFree(ct);CHKERRQ(ierr);
1548     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1549     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1550     /*
1551        Everyone has to call to draw the matrix since the graphics waits are
1552        synchronized across all processors that share the PetscDraw object
1553     */
1554     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1555     if (!rank) {
1556       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1557       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1558       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1559       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1560     }
1561     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1562     ierr = MatDestroy(&A);CHKERRQ(ierr);
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatView_MPIAIJ"
1569 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1570 {
1571   PetscErrorCode ierr;
1572   PetscBool      iascii,isdraw,issocket,isbinary;
1573 
1574   PetscFunctionBegin;
1575   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1576   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1577   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1578   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1579   if (iascii || isdraw || isbinary || issocket) {
1580     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1581   } else {
1582     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1583   }
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatSOR_MPIAIJ"
1589 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1590 {
1591   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1592   PetscErrorCode ierr;
1593   Vec            bb1 = 0;
1594   PetscBool      hasop;
1595 
1596   PetscFunctionBegin;
1597   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1598     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1599   }
1600 
1601   if (flag == SOR_APPLY_UPPER) {
1602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1603     PetscFunctionReturn(0);
1604   }
1605 
1606   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1607     if (flag & SOR_ZERO_INITIAL_GUESS) {
1608       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1609       its--;
1610     }
1611 
1612     while (its--) {
1613       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1615 
1616       /* update rhs: bb1 = bb - B*x */
1617       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1618       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1619 
1620       /* local sweep */
1621       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1622     }
1623   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1624     if (flag & SOR_ZERO_INITIAL_GUESS) {
1625       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1626       its--;
1627     }
1628     while (its--) {
1629       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1631 
1632       /* update rhs: bb1 = bb - B*x */
1633       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1634       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1635 
1636       /* local sweep */
1637       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1638     }
1639   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644     while (its--) {
1645       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647 
1648       /* update rhs: bb1 = bb - B*x */
1649       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1650       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1651 
1652       /* local sweep */
1653       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1654     }
1655   }  else if (flag & SOR_EISENSTAT) {
1656     Vec         xx1;
1657 
1658     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1659     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1660 
1661     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663     if (!mat->diag) {
1664       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1665       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1666     }
1667     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1668     if (hasop) {
1669       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1670     } else {
1671       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1672     }
1673     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1674 
1675     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1676 
1677     /* local sweep */
1678     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1679     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1680     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1681   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1682 
1683   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatPermute_MPIAIJ"
1689 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1690 {
1691   MPI_Comm       comm;
1692   PetscInt       first,local_rowsize,local_colsize;
1693   const PetscInt *rows;
1694   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1695   PetscErrorCode ierr;
1696 
1697   PetscFunctionBegin;
1698   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1699   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1700   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1701   /* collect the global row permutation and invert it */
1702   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1703   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1704   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1705   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1706   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1707   /* get the local target indices */
1708   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1709   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1710   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1711   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1712   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1713   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1714   /* the column permutation is so much easier;
1715      make a local version of 'colp' and invert it */
1716   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1717   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1718   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1719   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1720   /* now we just get the submatrix */
1721   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1722   /* clean up */
1723   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1724   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1730 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1731 {
1732   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1733   Mat            A = mat->A,B = mat->B;
1734   PetscErrorCode ierr;
1735   PetscReal      isend[5],irecv[5];
1736 
1737   PetscFunctionBegin;
1738   info->block_size     = 1.0;
1739   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1740   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1741   isend[3] = info->memory;  isend[4] = info->mallocs;
1742   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1743   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1744   isend[3] += info->memory;  isend[4] += info->mallocs;
1745   if (flag == MAT_LOCAL) {
1746     info->nz_used      = isend[0];
1747     info->nz_allocated = isend[1];
1748     info->nz_unneeded  = isend[2];
1749     info->memory       = isend[3];
1750     info->mallocs      = isend[4];
1751   } else if (flag == MAT_GLOBAL_MAX) {
1752     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1753     info->nz_used      = irecv[0];
1754     info->nz_allocated = irecv[1];
1755     info->nz_unneeded  = irecv[2];
1756     info->memory       = irecv[3];
1757     info->mallocs      = irecv[4];
1758   } else if (flag == MAT_GLOBAL_SUM) {
1759     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1760     info->nz_used      = irecv[0];
1761     info->nz_allocated = irecv[1];
1762     info->nz_unneeded  = irecv[2];
1763     info->memory       = irecv[3];
1764     info->mallocs      = irecv[4];
1765   }
1766   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1767   info->fill_ratio_needed = 0;
1768   info->factor_mallocs    = 0;
1769 
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatSetOption_MPIAIJ"
1775 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1776 {
1777   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1778   PetscErrorCode ierr;
1779 
1780   PetscFunctionBegin;
1781   switch (op) {
1782   case MAT_NEW_NONZERO_LOCATIONS:
1783   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1784   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1785   case MAT_KEEP_NONZERO_PATTERN:
1786   case MAT_NEW_NONZERO_LOCATION_ERR:
1787   case MAT_USE_INODES:
1788   case MAT_IGNORE_ZERO_ENTRIES:
1789     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1790     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1791     break;
1792   case MAT_ROW_ORIENTED:
1793     a->roworiented = flg;
1794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1795     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1796     break;
1797   case MAT_NEW_DIAGONALS:
1798     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1799     break;
1800   case MAT_IGNORE_OFF_PROC_ENTRIES:
1801     a->donotstash = flg;
1802     break;
1803   case MAT_SPD:
1804     A->spd_set                         = PETSC_TRUE;
1805     A->spd                             = flg;
1806     if (flg) {
1807       A->symmetric                     = PETSC_TRUE;
1808       A->structurally_symmetric        = PETSC_TRUE;
1809       A->symmetric_set                 = PETSC_TRUE;
1810       A->structurally_symmetric_set    = PETSC_TRUE;
1811     }
1812     break;
1813   case MAT_SYMMETRIC:
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     break;
1816   case MAT_STRUCTURALLY_SYMMETRIC:
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_HERMITIAN:
1820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1821     break;
1822   case MAT_SYMMETRY_ETERNAL:
1823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1824     break;
1825   default:
1826     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1827   }
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 #undef __FUNCT__
1832 #define __FUNCT__ "MatGetRow_MPIAIJ"
1833 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1834 {
1835   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1836   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1837   PetscErrorCode ierr;
1838   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1839   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1840   PetscInt       *cmap,*idx_p;
1841 
1842   PetscFunctionBegin;
1843   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1844   mat->getrowactive = PETSC_TRUE;
1845 
1846   if (!mat->rowvalues && (idx || v)) {
1847     /*
1848         allocate enough space to hold information from the longest row.
1849     */
1850     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1851     PetscInt   max = 1,tmp;
1852     for (i=0; i<matin->rmap->n; i++) {
1853       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1854       if (max < tmp) { max = tmp; }
1855     }
1856     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1857   }
1858 
1859   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1860   lrow = row - rstart;
1861 
1862   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1863   if (!v)   {pvA = 0; pvB = 0;}
1864   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1865   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1866   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1867   nztot = nzA + nzB;
1868 
1869   cmap  = mat->garray;
1870   if (v  || idx) {
1871     if (nztot) {
1872       /* Sort by increasing column numbers, assuming A and B already sorted */
1873       PetscInt imark = -1;
1874       if (v) {
1875         *v = v_p = mat->rowvalues;
1876         for (i=0; i<nzB; i++) {
1877           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1878           else break;
1879         }
1880         imark = i;
1881         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1882         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1883       }
1884       if (idx) {
1885         *idx = idx_p = mat->rowindices;
1886         if (imark > -1) {
1887           for (i=0; i<imark; i++) {
1888             idx_p[i] = cmap[cworkB[i]];
1889           }
1890         } else {
1891           for (i=0; i<nzB; i++) {
1892             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1893             else break;
1894           }
1895           imark = i;
1896         }
1897         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1898         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1899       }
1900     } else {
1901       if (idx) *idx = 0;
1902       if (v)   *v   = 0;
1903     }
1904   }
1905   *nz = nztot;
1906   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1907   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1913 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1914 {
1915   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1916 
1917   PetscFunctionBegin;
1918   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1919   aij->getrowactive = PETSC_FALSE;
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatNorm_MPIAIJ"
1925 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1926 {
1927   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1928   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1929   PetscErrorCode ierr;
1930   PetscInt       i,j,cstart = mat->cmap->rstart;
1931   PetscReal      sum = 0.0;
1932   MatScalar      *v;
1933 
1934   PetscFunctionBegin;
1935   if (aij->size == 1) {
1936     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1937   } else {
1938     if (type == NORM_FROBENIUS) {
1939       v = amat->a;
1940       for (i=0; i<amat->nz; i++) {
1941 #if defined(PETSC_USE_COMPLEX)
1942         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1943 #else
1944         sum += (*v)*(*v); v++;
1945 #endif
1946       }
1947       v = bmat->a;
1948       for (i=0; i<bmat->nz; i++) {
1949 #if defined(PETSC_USE_COMPLEX)
1950         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1951 #else
1952         sum += (*v)*(*v); v++;
1953 #endif
1954       }
1955       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1956       *norm = PetscSqrtReal(*norm);
1957     } else if (type == NORM_1) { /* max column norm */
1958       PetscReal *tmp,*tmp2;
1959       PetscInt  *jj,*garray = aij->garray;
1960       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1961       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1962       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1963       *norm = 0.0;
1964       v = amat->a; jj = amat->j;
1965       for (j=0; j<amat->nz; j++) {
1966         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1967       }
1968       v = bmat->a; jj = bmat->j;
1969       for (j=0; j<bmat->nz; j++) {
1970         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1971       }
1972       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1973       for (j=0; j<mat->cmap->N; j++) {
1974         if (tmp2[j] > *norm) *norm = tmp2[j];
1975       }
1976       ierr = PetscFree(tmp);CHKERRQ(ierr);
1977       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1978     } else if (type == NORM_INFINITY) { /* max row norm */
1979       PetscReal ntemp = 0.0;
1980       for (j=0; j<aij->A->rmap->n; j++) {
1981         v = amat->a + amat->i[j];
1982         sum = 0.0;
1983         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1984           sum += PetscAbsScalar(*v); v++;
1985         }
1986         v = bmat->a + bmat->i[j];
1987         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         if (sum > ntemp) ntemp = sum;
1991       }
1992       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1993     } else {
1994       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1995     }
1996   }
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatTranspose_MPIAIJ"
2002 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2003 {
2004   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2005   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2006   PetscErrorCode ierr;
2007   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2008   PetscInt       cstart=A->cmap->rstart,ncol;
2009   Mat            B;
2010   MatScalar      *array;
2011 
2012   PetscFunctionBegin;
2013   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2014 
2015   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2016   ai = Aloc->i; aj = Aloc->j;
2017   bi = Bloc->i; bj = Bloc->j;
2018   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2019     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2020     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2021     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2022     for (i=0; i<ai[ma]; i++){
2023       d_nnz[aj[i]] ++;
2024       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2025     }
2026 
2027     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2028     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2029     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2030     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2031     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2032     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2033   } else {
2034     B = *matout;
2035   }
2036 
2037   /* copy over the A part */
2038   array = Aloc->a;
2039   row = A->rmap->rstart;
2040   for (i=0; i<ma; i++) {
2041     ncol = ai[i+1]-ai[i];
2042     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2043     row++; array += ncol; aj += ncol;
2044   }
2045   aj = Aloc->j;
2046   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2047 
2048   /* copy over the B part */
2049   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2050   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2051   array = Bloc->a;
2052   row = A->rmap->rstart;
2053   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2054   cols_tmp = cols;
2055   for (i=0; i<mb; i++) {
2056     ncol = bi[i+1]-bi[i];
2057     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2058     row++; array += ncol; cols_tmp += ncol;
2059   }
2060   ierr = PetscFree(cols);CHKERRQ(ierr);
2061 
2062   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2063   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2064   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2065     *matout = B;
2066   } else {
2067     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2068   }
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2074 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2075 {
2076   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2077   Mat            a = aij->A,b = aij->B;
2078   PetscErrorCode ierr;
2079   PetscInt       s1,s2,s3;
2080 
2081   PetscFunctionBegin;
2082   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2083   if (rr) {
2084     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2085     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2086     /* Overlap communication with computation. */
2087     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2088   }
2089   if (ll) {
2090     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2091     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2092     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2093   }
2094   /* scale  the diagonal block */
2095   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2096 
2097   if (rr) {
2098     /* Do a scatter end and then right scale the off-diagonal block */
2099     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2100     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2101   }
2102 
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 #undef __FUNCT__
2107 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2108 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2109 {
2110   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2111   PetscErrorCode ierr;
2112 
2113   PetscFunctionBegin;
2114   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2115   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2116   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2117   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2118   PetscFunctionReturn(0);
2119 }
2120 #undef __FUNCT__
2121 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2122 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2123 {
2124   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2125   PetscErrorCode ierr;
2126 
2127   PetscFunctionBegin;
2128   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatEqual_MPIAIJ"
2134 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2135 {
2136   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2137   Mat            a,b,c,d;
2138   PetscBool      flg;
2139   PetscErrorCode ierr;
2140 
2141   PetscFunctionBegin;
2142   a = matA->A; b = matA->B;
2143   c = matB->A; d = matB->B;
2144 
2145   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2146   if (flg) {
2147     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2148   }
2149   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatCopy_MPIAIJ"
2155 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2156 {
2157   PetscErrorCode ierr;
2158   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2159   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2160 
2161   PetscFunctionBegin;
2162   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2163   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2164     /* because of the column compression in the off-processor part of the matrix a->B,
2165        the number of columns in a->B and b->B may be different, hence we cannot call
2166        the MatCopy() directly on the two parts. If need be, we can provide a more
2167        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2168        then copying the submatrices */
2169     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2170   } else {
2171     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2172     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2173   }
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatSetUp_MPIAIJ"
2179 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2180 {
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 #undef __FUNCT__
2189 #define __FUNCT__ "MatAXPY_MPIAIJ"
2190 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2191 {
2192   PetscErrorCode ierr;
2193   PetscInt       i;
2194   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2195   PetscBLASInt   bnz,one=1;
2196   Mat_SeqAIJ     *x,*y;
2197 
2198   PetscFunctionBegin;
2199   if (str == SAME_NONZERO_PATTERN) {
2200     PetscScalar alpha = a;
2201     x = (Mat_SeqAIJ *)xx->A->data;
2202     y = (Mat_SeqAIJ *)yy->A->data;
2203     bnz = PetscBLASIntCast(x->nz);
2204     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2205     x = (Mat_SeqAIJ *)xx->B->data;
2206     y = (Mat_SeqAIJ *)yy->B->data;
2207     bnz = PetscBLASIntCast(x->nz);
2208     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2209   } else if (str == SUBSET_NONZERO_PATTERN) {
2210     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2211 
2212     x = (Mat_SeqAIJ *)xx->B->data;
2213     y = (Mat_SeqAIJ *)yy->B->data;
2214     if (y->xtoy && y->XtoY != xx->B) {
2215       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2216       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2217     }
2218     if (!y->xtoy) { /* get xtoy */
2219       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2220       y->XtoY = xx->B;
2221       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2222     }
2223     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2224   } else {
2225     Mat B;
2226     PetscInt *nnz_d,*nnz_o;
2227     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2228     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2229     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2230     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2231     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2232     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2233     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2234     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2235     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2236     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2237     ierr = MatHeaderReplace(Y,B);
2238     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2239     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2240   }
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatConjugate_MPIAIJ"
2248 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2249 {
2250 #if defined(PETSC_USE_COMPLEX)
2251   PetscErrorCode ierr;
2252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2253 
2254   PetscFunctionBegin;
2255   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2256   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2257 #else
2258   PetscFunctionBegin;
2259 #endif
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatRealPart_MPIAIJ"
2265 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2266 {
2267   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2272   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2279 {
2280   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2281   PetscErrorCode ierr;
2282 
2283   PetscFunctionBegin;
2284   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2285   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #ifdef PETSC_HAVE_PBGL
2290 
2291 #include <boost/parallel/mpi/bsp_process_group.hpp>
2292 #include <boost/graph/distributed/ilu_default_graph.hpp>
2293 #include <boost/graph/distributed/ilu_0_block.hpp>
2294 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2295 #include <boost/graph/distributed/petsc/interface.hpp>
2296 #include <boost/multi_array.hpp>
2297 #include <boost/parallel/distributed_property_map->hpp>
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2301 /*
2302   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2303 */
2304 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2305 {
2306   namespace petsc = boost::distributed::petsc;
2307 
2308   namespace graph_dist = boost::graph::distributed;
2309   using boost::graph::distributed::ilu_default::process_group_type;
2310   using boost::graph::ilu_permuted;
2311 
2312   PetscBool       row_identity, col_identity;
2313   PetscContainer  c;
2314   PetscInt        m, n, M, N;
2315   PetscErrorCode  ierr;
2316 
2317   PetscFunctionBegin;
2318   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2319   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2320   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2321   if (!row_identity || !col_identity) {
2322     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2323   }
2324 
2325   process_group_type pg;
2326   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2327   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2328   lgraph_type&   level_graph = *lgraph_p;
2329   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2330 
2331   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2332   ilu_permuted(level_graph);
2333 
2334   /* put together the new matrix */
2335   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2336   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2337   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2338   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2339   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2340   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2342 
2343   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2344   ierr = PetscContainerSetPointer(c, lgraph_p);
2345   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2346   ierr = PetscContainerDestroy(&c);
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2352 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2353 {
2354   PetscFunctionBegin;
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 #undef __FUNCT__
2359 #define __FUNCT__ "MatSolve_MPIAIJ"
2360 /*
2361   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2362 */
2363 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2364 {
2365   namespace graph_dist = boost::graph::distributed;
2366 
2367   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2368   lgraph_type*   lgraph_p;
2369   PetscContainer c;
2370   PetscErrorCode ierr;
2371 
2372   PetscFunctionBegin;
2373   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2374   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2375   ierr = VecCopy(b, x);CHKERRQ(ierr);
2376 
2377   PetscScalar* array_x;
2378   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2379   PetscInt sx;
2380   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2381 
2382   PetscScalar* array_b;
2383   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2384   PetscInt sb;
2385   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2386 
2387   lgraph_type&   level_graph = *lgraph_p;
2388   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2389 
2390   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2391   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2392                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2393 
2394   typedef boost::iterator_property_map<array_ref_type::iterator,
2395                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2396   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2397                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2398 
2399   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2400 
2401   PetscFunctionReturn(0);
2402 }
2403 #endif
2404 
2405 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2406   PetscInt       nzlocal,nsends,nrecvs;
2407   PetscMPIInt    *send_rank,*recv_rank;
2408   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2409   PetscScalar    *sbuf_a,**rbuf_a;
2410   PetscErrorCode (*Destroy)(Mat);
2411 } Mat_Redundant;
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2415 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2416 {
2417   PetscErrorCode       ierr;
2418   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2419   PetscInt             i;
2420 
2421   PetscFunctionBegin;
2422   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2423   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2424   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2425   for (i=0; i<redund->nrecvs; i++){
2426     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2427     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2428   }
2429   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2430   ierr = PetscFree(redund);CHKERRQ(ierr);
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatDestroy_MatRedundant"
2436 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2437 {
2438   PetscErrorCode  ierr;
2439   PetscContainer  container;
2440   Mat_Redundant   *redund=PETSC_NULL;
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2444   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2445   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2446   A->ops->destroy = redund->Destroy;
2447   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2448   if (A->ops->destroy) {
2449     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2456 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2457 {
2458   PetscMPIInt    rank,size;
2459   MPI_Comm       comm=((PetscObject)mat)->comm;
2460   PetscErrorCode ierr;
2461   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2462   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2463   PetscInt       *rowrange=mat->rmap->range;
2464   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2465   Mat            A=aij->A,B=aij->B,C=*matredundant;
2466   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2467   PetscScalar    *sbuf_a;
2468   PetscInt       nzlocal=a->nz+b->nz;
2469   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2470   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2471   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2472   MatScalar      *aworkA,*aworkB;
2473   PetscScalar    *vals;
2474   PetscMPIInt    tag1,tag2,tag3,imdex;
2475   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2476                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2477   MPI_Status     recv_status,*send_status;
2478   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2479   PetscInt       **rbuf_j=PETSC_NULL;
2480   PetscScalar    **rbuf_a=PETSC_NULL;
2481   Mat_Redundant  *redund=PETSC_NULL;
2482   PetscContainer container;
2483 
2484   PetscFunctionBegin;
2485   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2486   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2487 
2488   if (reuse == MAT_REUSE_MATRIX) {
2489     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2490     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2491     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2492     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2493     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2494     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2495     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2496     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2497 
2498     nsends    = redund->nsends;
2499     nrecvs    = redund->nrecvs;
2500     send_rank = redund->send_rank;
2501     recv_rank = redund->recv_rank;
2502     sbuf_nz   = redund->sbuf_nz;
2503     rbuf_nz   = redund->rbuf_nz;
2504     sbuf_j    = redund->sbuf_j;
2505     sbuf_a    = redund->sbuf_a;
2506     rbuf_j    = redund->rbuf_j;
2507     rbuf_a    = redund->rbuf_a;
2508   }
2509 
2510   if (reuse == MAT_INITIAL_MATRIX){
2511     PetscMPIInt  subrank,subsize;
2512     PetscInt     nleftover,np_subcomm;
2513     /* get the destination processors' id send_rank, nsends and nrecvs */
2514     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2515     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2516     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2517     np_subcomm = size/nsubcomm;
2518     nleftover  = size - nsubcomm*np_subcomm;
2519     nsends = 0; nrecvs = 0;
2520     for (i=0; i<size; i++){ /* i=rank*/
2521       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2522         send_rank[nsends] = i; nsends++;
2523         recv_rank[nrecvs++] = i;
2524       }
2525     }
2526     if (rank >= size - nleftover){/* this proc is a leftover processor */
2527       i = size-nleftover-1;
2528       j = 0;
2529       while (j < nsubcomm - nleftover){
2530         send_rank[nsends++] = i;
2531         i--; j++;
2532       }
2533     }
2534 
2535     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2536       for (i=0; i<nleftover; i++){
2537         recv_rank[nrecvs++] = size-nleftover+i;
2538       }
2539     }
2540 
2541     /* allocate sbuf_j, sbuf_a */
2542     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2543     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2544     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2545   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2546 
2547   /* copy mat's local entries into the buffers */
2548   if (reuse == MAT_INITIAL_MATRIX){
2549     rownz_max = 0;
2550     rptr = sbuf_j;
2551     cols = sbuf_j + rend-rstart + 1;
2552     vals = sbuf_a;
2553     rptr[0] = 0;
2554     for (i=0; i<rend-rstart; i++){
2555       row = i + rstart;
2556       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2557       ncols  = nzA + nzB;
2558       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2559       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2560       /* load the column indices for this row into cols */
2561       lwrite = 0;
2562       for (l=0; l<nzB; l++) {
2563         if ((ctmp = bmap[cworkB[l]]) < cstart){
2564           vals[lwrite]   = aworkB[l];
2565           cols[lwrite++] = ctmp;
2566         }
2567       }
2568       for (l=0; l<nzA; l++){
2569         vals[lwrite]   = aworkA[l];
2570         cols[lwrite++] = cstart + cworkA[l];
2571       }
2572       for (l=0; l<nzB; l++) {
2573         if ((ctmp = bmap[cworkB[l]]) >= cend){
2574           vals[lwrite]   = aworkB[l];
2575           cols[lwrite++] = ctmp;
2576         }
2577       }
2578       vals += ncols;
2579       cols += ncols;
2580       rptr[i+1] = rptr[i] + ncols;
2581       if (rownz_max < ncols) rownz_max = ncols;
2582     }
2583     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2584   } else { /* only copy matrix values into sbuf_a */
2585     rptr = sbuf_j;
2586     vals = sbuf_a;
2587     rptr[0] = 0;
2588     for (i=0; i<rend-rstart; i++){
2589       row = i + rstart;
2590       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2591       ncols  = nzA + nzB;
2592       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2593       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2594       lwrite = 0;
2595       for (l=0; l<nzB; l++) {
2596         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2597       }
2598       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2599       for (l=0; l<nzB; l++) {
2600         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2601       }
2602       vals += ncols;
2603       rptr[i+1] = rptr[i] + ncols;
2604     }
2605   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2606 
2607   /* send nzlocal to others, and recv other's nzlocal */
2608   /*--------------------------------------------------*/
2609   if (reuse == MAT_INITIAL_MATRIX){
2610     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2611     s_waits2 = s_waits3 + nsends;
2612     s_waits1 = s_waits2 + nsends;
2613     r_waits1 = s_waits1 + nsends;
2614     r_waits2 = r_waits1 + nrecvs;
2615     r_waits3 = r_waits2 + nrecvs;
2616   } else {
2617     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2618     r_waits3 = s_waits3 + nsends;
2619   }
2620 
2621   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2622   if (reuse == MAT_INITIAL_MATRIX){
2623     /* get new tags to keep the communication clean */
2624     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2625     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2626     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2627 
2628     /* post receives of other's nzlocal */
2629     for (i=0; i<nrecvs; i++){
2630       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2631     }
2632     /* send nzlocal to others */
2633     for (i=0; i<nsends; i++){
2634       sbuf_nz[i] = nzlocal;
2635       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2636     }
2637     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2638     count = nrecvs;
2639     while (count) {
2640       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2641       recv_rank[imdex] = recv_status.MPI_SOURCE;
2642       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2643       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2644 
2645       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2646       rbuf_nz[imdex] += i + 2;
2647       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2648       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2649       count--;
2650     }
2651     /* wait on sends of nzlocal */
2652     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2653     /* send mat->i,j to others, and recv from other's */
2654     /*------------------------------------------------*/
2655     for (i=0; i<nsends; i++){
2656       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2657       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2658     }
2659     /* wait on receives of mat->i,j */
2660     /*------------------------------*/
2661     count = nrecvs;
2662     while (count) {
2663       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2664       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2665       count--;
2666     }
2667     /* wait on sends of mat->i,j */
2668     /*---------------------------*/
2669     if (nsends) {
2670       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2671     }
2672   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2673 
2674   /* post receives, send and receive mat->a */
2675   /*----------------------------------------*/
2676   for (imdex=0; imdex<nrecvs; imdex++) {
2677     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2678   }
2679   for (i=0; i<nsends; i++){
2680     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2681   }
2682   count = nrecvs;
2683   while (count) {
2684     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2685     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2686     count--;
2687   }
2688   if (nsends) {
2689     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2690   }
2691 
2692   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2693 
2694   /* create redundant matrix */
2695   /*-------------------------*/
2696   if (reuse == MAT_INITIAL_MATRIX){
2697     /* compute rownz_max for preallocation */
2698     for (imdex=0; imdex<nrecvs; imdex++){
2699       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2700       rptr = rbuf_j[imdex];
2701       for (i=0; i<j; i++){
2702         ncols = rptr[i+1] - rptr[i];
2703         if (rownz_max < ncols) rownz_max = ncols;
2704       }
2705     }
2706 
2707     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2708     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2709     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2710     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2711     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2712   } else {
2713     C = *matredundant;
2714   }
2715 
2716   /* insert local matrix entries */
2717   rptr = sbuf_j;
2718   cols = sbuf_j + rend-rstart + 1;
2719   vals = sbuf_a;
2720   for (i=0; i<rend-rstart; i++){
2721     row   = i + rstart;
2722     ncols = rptr[i+1] - rptr[i];
2723     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2724     vals += ncols;
2725     cols += ncols;
2726   }
2727   /* insert received matrix entries */
2728   for (imdex=0; imdex<nrecvs; imdex++){
2729     rstart = rowrange[recv_rank[imdex]];
2730     rend   = rowrange[recv_rank[imdex]+1];
2731     rptr = rbuf_j[imdex];
2732     cols = rbuf_j[imdex] + rend-rstart + 1;
2733     vals = rbuf_a[imdex];
2734     for (i=0; i<rend-rstart; i++){
2735       row   = i + rstart;
2736       ncols = rptr[i+1] - rptr[i];
2737       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2738       vals += ncols;
2739       cols += ncols;
2740     }
2741   }
2742   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2743   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2744   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2745   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2746   if (reuse == MAT_INITIAL_MATRIX) {
2747     PetscContainer container;
2748     *matredundant = C;
2749     /* create a supporting struct and attach it to C for reuse */
2750     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2751     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2752     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2753     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2754     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2755     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2756 
2757     redund->nzlocal = nzlocal;
2758     redund->nsends  = nsends;
2759     redund->nrecvs  = nrecvs;
2760     redund->send_rank = send_rank;
2761     redund->recv_rank = recv_rank;
2762     redund->sbuf_nz = sbuf_nz;
2763     redund->rbuf_nz = rbuf_nz;
2764     redund->sbuf_j  = sbuf_j;
2765     redund->sbuf_a  = sbuf_a;
2766     redund->rbuf_j  = rbuf_j;
2767     redund->rbuf_a  = rbuf_a;
2768 
2769     redund->Destroy = C->ops->destroy;
2770     C->ops->destroy = MatDestroy_MatRedundant;
2771   }
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 #undef __FUNCT__
2776 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2777 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2778 {
2779   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2780   PetscErrorCode ierr;
2781   PetscInt       i,*idxb = 0;
2782   PetscScalar    *va,*vb;
2783   Vec            vtmp;
2784 
2785   PetscFunctionBegin;
2786   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2787   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2788   if (idx) {
2789     for (i=0; i<A->rmap->n; i++) {
2790       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2791     }
2792   }
2793 
2794   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2795   if (idx) {
2796     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2797   }
2798   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2799   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2800 
2801   for (i=0; i<A->rmap->n; i++){
2802     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2803       va[i] = vb[i];
2804       if (idx) idx[i] = a->garray[idxb[i]];
2805     }
2806   }
2807 
2808   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2809   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2810   ierr = PetscFree(idxb);CHKERRQ(ierr);
2811   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 #undef __FUNCT__
2816 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2817 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2818 {
2819   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2820   PetscErrorCode ierr;
2821   PetscInt       i,*idxb = 0;
2822   PetscScalar    *va,*vb;
2823   Vec            vtmp;
2824 
2825   PetscFunctionBegin;
2826   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2827   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2828   if (idx) {
2829     for (i=0; i<A->cmap->n; i++) {
2830       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2831     }
2832   }
2833 
2834   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2835   if (idx) {
2836     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2837   }
2838   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2839   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2840 
2841   for (i=0; i<A->rmap->n; i++){
2842     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2843       va[i] = vb[i];
2844       if (idx) idx[i] = a->garray[idxb[i]];
2845     }
2846   }
2847 
2848   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2849   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2850   ierr = PetscFree(idxb);CHKERRQ(ierr);
2851   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 #undef __FUNCT__
2856 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2857 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2858 {
2859   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2860   PetscInt       n      = A->rmap->n;
2861   PetscInt       cstart = A->cmap->rstart;
2862   PetscInt      *cmap   = mat->garray;
2863   PetscInt      *diagIdx, *offdiagIdx;
2864   Vec            diagV, offdiagV;
2865   PetscScalar   *a, *diagA, *offdiagA;
2866   PetscInt       r;
2867   PetscErrorCode ierr;
2868 
2869   PetscFunctionBegin;
2870   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2871   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2872   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2873   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2874   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2875   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2876   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2877   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2878   for(r = 0; r < n; ++r) {
2879     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2880       a[r]   = diagA[r];
2881       idx[r] = cstart + diagIdx[r];
2882     } else {
2883       a[r]   = offdiagA[r];
2884       idx[r] = cmap[offdiagIdx[r]];
2885     }
2886   }
2887   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2888   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2889   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2890   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2891   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2892   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2893   PetscFunctionReturn(0);
2894 }
2895 
2896 #undef __FUNCT__
2897 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2898 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2899 {
2900   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2901   PetscInt       n      = A->rmap->n;
2902   PetscInt       cstart = A->cmap->rstart;
2903   PetscInt      *cmap   = mat->garray;
2904   PetscInt      *diagIdx, *offdiagIdx;
2905   Vec            diagV, offdiagV;
2906   PetscScalar   *a, *diagA, *offdiagA;
2907   PetscInt       r;
2908   PetscErrorCode ierr;
2909 
2910   PetscFunctionBegin;
2911   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2912   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2913   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2914   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2915   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2916   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2917   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2918   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2919   for(r = 0; r < n; ++r) {
2920     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2921       a[r]   = diagA[r];
2922       idx[r] = cstart + diagIdx[r];
2923     } else {
2924       a[r]   = offdiagA[r];
2925       idx[r] = cmap[offdiagIdx[r]];
2926     }
2927   }
2928   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2929   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2930   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2931   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2932   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2933   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 #undef __FUNCT__
2938 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2939 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2940 {
2941   PetscErrorCode ierr;
2942   Mat            *dummy;
2943 
2944   PetscFunctionBegin;
2945   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2946   *newmat = *dummy;
2947   ierr = PetscFree(dummy);CHKERRQ(ierr);
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2952 
2953 #undef __FUNCT__
2954 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2955 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2956 {
2957   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2958   PetscErrorCode ierr;
2959 
2960   PetscFunctionBegin;
2961   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 
2966 /* -------------------------------------------------------------------*/
2967 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2968        MatGetRow_MPIAIJ,
2969        MatRestoreRow_MPIAIJ,
2970        MatMult_MPIAIJ,
2971 /* 4*/ MatMultAdd_MPIAIJ,
2972        MatMultTranspose_MPIAIJ,
2973        MatMultTransposeAdd_MPIAIJ,
2974 #ifdef PETSC_HAVE_PBGL
2975        MatSolve_MPIAIJ,
2976 #else
2977        0,
2978 #endif
2979        0,
2980        0,
2981 /*10*/ 0,
2982        0,
2983        0,
2984        MatSOR_MPIAIJ,
2985        MatTranspose_MPIAIJ,
2986 /*15*/ MatGetInfo_MPIAIJ,
2987        MatEqual_MPIAIJ,
2988        MatGetDiagonal_MPIAIJ,
2989        MatDiagonalScale_MPIAIJ,
2990        MatNorm_MPIAIJ,
2991 /*20*/ MatAssemblyBegin_MPIAIJ,
2992        MatAssemblyEnd_MPIAIJ,
2993        MatSetOption_MPIAIJ,
2994        MatZeroEntries_MPIAIJ,
2995 /*24*/ MatZeroRows_MPIAIJ,
2996        0,
2997 #ifdef PETSC_HAVE_PBGL
2998        0,
2999 #else
3000        0,
3001 #endif
3002        0,
3003        0,
3004 /*29*/ MatSetUp_MPIAIJ,
3005 #ifdef PETSC_HAVE_PBGL
3006        0,
3007 #else
3008        0,
3009 #endif
3010        0,
3011        0,
3012        0,
3013 /*34*/ MatDuplicate_MPIAIJ,
3014        0,
3015        0,
3016        0,
3017        0,
3018 /*39*/ MatAXPY_MPIAIJ,
3019        MatGetSubMatrices_MPIAIJ,
3020        MatIncreaseOverlap_MPIAIJ,
3021        MatGetValues_MPIAIJ,
3022        MatCopy_MPIAIJ,
3023 /*44*/ MatGetRowMax_MPIAIJ,
3024        MatScale_MPIAIJ,
3025        0,
3026        0,
3027        MatZeroRowsColumns_MPIAIJ,
3028 /*49*/ MatSetBlockSize_MPIAIJ,
3029        0,
3030        0,
3031        0,
3032        0,
3033 /*54*/ MatFDColoringCreate_MPIAIJ,
3034        0,
3035        MatSetUnfactored_MPIAIJ,
3036        MatPermute_MPIAIJ,
3037        0,
3038 /*59*/ MatGetSubMatrix_MPIAIJ,
3039        MatDestroy_MPIAIJ,
3040        MatView_MPIAIJ,
3041        0,
3042        0,
3043 /*64*/ 0,
3044        0,
3045        0,
3046        0,
3047        0,
3048 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3049        MatGetRowMinAbs_MPIAIJ,
3050        0,
3051        MatSetColoring_MPIAIJ,
3052 #if defined(PETSC_HAVE_ADIC)
3053        MatSetValuesAdic_MPIAIJ,
3054 #else
3055        0,
3056 #endif
3057        MatSetValuesAdifor_MPIAIJ,
3058 /*75*/ MatFDColoringApply_AIJ,
3059        0,
3060        0,
3061        0,
3062        0,
3063 /*80*/ 0,
3064        0,
3065        0,
3066 /*83*/ MatLoad_MPIAIJ,
3067        0,
3068        0,
3069        0,
3070        0,
3071        0,
3072 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3073        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3074        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3075        MatPtAP_Basic,
3076        MatPtAPSymbolic_MPIAIJ,
3077 /*94*/ MatPtAPNumeric_MPIAIJ,
3078        0,
3079        0,
3080        0,
3081        0,
3082 /*99*/ 0,
3083        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3084        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3085        MatConjugate_MPIAIJ,
3086        0,
3087 /*104*/MatSetValuesRow_MPIAIJ,
3088        MatRealPart_MPIAIJ,
3089        MatImaginaryPart_MPIAIJ,
3090        0,
3091        0,
3092 /*109*/0,
3093        MatGetRedundantMatrix_MPIAIJ,
3094        MatGetRowMin_MPIAIJ,
3095        0,
3096        0,
3097 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3098        0,
3099        0,
3100        0,
3101        0,
3102 /*119*/0,
3103        0,
3104        0,
3105        0,
3106        MatGetMultiProcBlock_MPIAIJ,
3107 /*124*/MatFindNonZeroRows_MPIAIJ,
3108        MatGetColumnNorms_MPIAIJ,
3109        MatInvertBlockDiagonal_MPIAIJ,
3110        0,
3111        MatGetSubMatricesParallel_MPIAIJ,
3112 /*129*/0,
3113        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3114        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3115        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3116        0,
3117 /*134*/0,
3118        0,
3119        0,
3120        0,
3121        0
3122 };
3123 
3124 /* ----------------------------------------------------------------------------------------*/
3125 
3126 EXTERN_C_BEGIN
3127 #undef __FUNCT__
3128 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3129 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3130 {
3131   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3132   PetscErrorCode ierr;
3133 
3134   PetscFunctionBegin;
3135   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3136   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3137   PetscFunctionReturn(0);
3138 }
3139 EXTERN_C_END
3140 
3141 EXTERN_C_BEGIN
3142 #undef __FUNCT__
3143 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3144 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3145 {
3146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3147   PetscErrorCode ierr;
3148 
3149   PetscFunctionBegin;
3150   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3151   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3152   PetscFunctionReturn(0);
3153 }
3154 EXTERN_C_END
3155 
3156 EXTERN_C_BEGIN
3157 #undef __FUNCT__
3158 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3159 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3160 {
3161   Mat_MPIAIJ     *b;
3162   PetscErrorCode ierr;
3163   PetscInt       i;
3164   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3165 
3166   PetscFunctionBegin;
3167   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3168   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3169   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3170   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3171   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3172   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3173 
3174   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3175   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3176   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3177   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3178   if (d_nnz) {
3179     for (i=0; i<B->rmap->n; i++) {
3180       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3181     }
3182   }
3183   if (o_nnz) {
3184     for (i=0; i<B->rmap->n; i++) {
3185       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3186     }
3187   }
3188   b = (Mat_MPIAIJ*)B->data;
3189 
3190   if (!B->preallocated) {
3191     /* Explicitly create 2 MATSEQAIJ matrices. */
3192     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3193     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3194     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3195     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3196     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3197     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3198     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3199     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3200   }
3201 
3202   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3203   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3204   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3205   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3206   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3207   B->preallocated = PETSC_TRUE;
3208   PetscFunctionReturn(0);
3209 }
3210 EXTERN_C_END
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3215 {
3216   Mat            mat;
3217   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   *newmat       = 0;
3222   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3223   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3224   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3225   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3226   a    = (Mat_MPIAIJ*)mat->data;
3227 
3228   mat->factortype    = matin->factortype;
3229   mat->rmap->bs      = matin->rmap->bs;
3230   mat->assembled    = PETSC_TRUE;
3231   mat->insertmode   = NOT_SET_VALUES;
3232   mat->preallocated = PETSC_TRUE;
3233 
3234   a->size           = oldmat->size;
3235   a->rank           = oldmat->rank;
3236   a->donotstash     = oldmat->donotstash;
3237   a->roworiented    = oldmat->roworiented;
3238   a->rowindices     = 0;
3239   a->rowvalues      = 0;
3240   a->getrowactive   = PETSC_FALSE;
3241 
3242   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3243   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3244 
3245   if (oldmat->colmap) {
3246 #if defined (PETSC_USE_CTABLE)
3247     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3248 #else
3249     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3250     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3251     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3252 #endif
3253   } else a->colmap = 0;
3254   if (oldmat->garray) {
3255     PetscInt len;
3256     len  = oldmat->B->cmap->n;
3257     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3258     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3259     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3260   } else a->garray = 0;
3261 
3262   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3263   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3264   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3265   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3266   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3268   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3269   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3270   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3271   *newmat = mat;
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 
3276 
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatLoad_MPIAIJ"
3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3280 {
3281   PetscScalar    *vals,*svals;
3282   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3283   PetscErrorCode ierr;
3284   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3285   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3286   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3287   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3288   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3289   int            fd;
3290 
3291   PetscFunctionBegin;
3292   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3293   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3294   if (!rank) {
3295     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3296     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3297     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3298   }
3299 
3300   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3301 
3302   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3303   M = header[1]; N = header[2];
3304   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3305   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3306   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3307 
3308   /* If global sizes are set, check if they are consistent with that given in the file */
3309   if (sizesset) {
3310     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3311   }
3312   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3313   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3314 
3315   /* determine ownership of all rows */
3316   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3317   else m = newMat->rmap->n; /* Set by user */
3318 
3319   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3320   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3321 
3322   /* First process needs enough room for process with most rows */
3323   if (!rank) {
3324     mmax       = rowners[1];
3325     for (i=2; i<size; i++) {
3326       mmax = PetscMax(mmax,rowners[i]);
3327     }
3328   } else mmax = m;
3329 
3330   rowners[0] = 0;
3331   for (i=2; i<=size; i++) {
3332     rowners[i] += rowners[i-1];
3333   }
3334   rstart = rowners[rank];
3335   rend   = rowners[rank+1];
3336 
3337   /* distribute row lengths to all processors */
3338   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3339   if (!rank) {
3340     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3341     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3342     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3343     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3344     for (j=0; j<m; j++) {
3345       procsnz[0] += ourlens[j];
3346     }
3347     for (i=1; i<size; i++) {
3348       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3349       /* calculate the number of nonzeros on each processor */
3350       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3351         procsnz[i] += rowlengths[j];
3352       }
3353       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3354     }
3355     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3356   } else {
3357     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3358   }
3359 
3360   if (!rank) {
3361     /* determine max buffer needed and allocate it */
3362     maxnz = 0;
3363     for (i=0; i<size; i++) {
3364       maxnz = PetscMax(maxnz,procsnz[i]);
3365     }
3366     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3367 
3368     /* read in my part of the matrix column indices  */
3369     nz   = procsnz[0];
3370     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3371     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3372 
3373     /* read in every one elses and ship off */
3374     for (i=1; i<size; i++) {
3375       nz     = procsnz[i];
3376       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3377       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3378     }
3379     ierr = PetscFree(cols);CHKERRQ(ierr);
3380   } else {
3381     /* determine buffer space needed for message */
3382     nz = 0;
3383     for (i=0; i<m; i++) {
3384       nz += ourlens[i];
3385     }
3386     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3387 
3388     /* receive message of column indices*/
3389     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3390   }
3391 
3392   /* determine column ownership if matrix is not square */
3393   if (N != M) {
3394     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3395     else n = newMat->cmap->n;
3396     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3397     cstart = cend - n;
3398   } else {
3399     cstart = rstart;
3400     cend   = rend;
3401     n      = cend - cstart;
3402   }
3403 
3404   /* loop over local rows, determining number of off diagonal entries */
3405   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3406   jj = 0;
3407   for (i=0; i<m; i++) {
3408     for (j=0; j<ourlens[i]; j++) {
3409       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3410       jj++;
3411     }
3412   }
3413 
3414   for (i=0; i<m; i++) {
3415     ourlens[i] -= offlens[i];
3416   }
3417   if (!sizesset) {
3418     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3419   }
3420   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3421 
3422   for (i=0; i<m; i++) {
3423     ourlens[i] += offlens[i];
3424   }
3425 
3426   if (!rank) {
3427     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3428 
3429     /* read in my part of the matrix numerical values  */
3430     nz   = procsnz[0];
3431     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3432 
3433     /* insert into matrix */
3434     jj      = rstart;
3435     smycols = mycols;
3436     svals   = vals;
3437     for (i=0; i<m; i++) {
3438       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3439       smycols += ourlens[i];
3440       svals   += ourlens[i];
3441       jj++;
3442     }
3443 
3444     /* read in other processors and ship out */
3445     for (i=1; i<size; i++) {
3446       nz     = procsnz[i];
3447       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3448       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3449     }
3450     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3451   } else {
3452     /* receive numeric values */
3453     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3454 
3455     /* receive message of values*/
3456     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3457 
3458     /* insert into matrix */
3459     jj      = rstart;
3460     smycols = mycols;
3461     svals   = vals;
3462     for (i=0; i<m; i++) {
3463       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3464       smycols += ourlens[i];
3465       svals   += ourlens[i];
3466       jj++;
3467     }
3468   }
3469   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3470   ierr = PetscFree(vals);CHKERRQ(ierr);
3471   ierr = PetscFree(mycols);CHKERRQ(ierr);
3472   ierr = PetscFree(rowners);CHKERRQ(ierr);
3473 
3474   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3475   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 #undef __FUNCT__
3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3482 {
3483   PetscErrorCode ierr;
3484   IS             iscol_local;
3485   PetscInt       csize;
3486 
3487   PetscFunctionBegin;
3488   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3489   if (call == MAT_REUSE_MATRIX) {
3490     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3491     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3492   } else {
3493     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3494   }
3495   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3496   if (call == MAT_INITIAL_MATRIX) {
3497     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3498     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3499   }
3500   PetscFunctionReturn(0);
3501 }
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3505 /*
3506     Not great since it makes two copies of the submatrix, first an SeqAIJ
3507   in local and then by concatenating the local matrices the end result.
3508   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3509 
3510   Note: This requires a sequential iscol with all indices.
3511 */
3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3513 {
3514   PetscErrorCode ierr;
3515   PetscMPIInt    rank,size;
3516   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3517   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518   Mat            *local,M,Mreuse;
3519   MatScalar      *vwork,*aa;
3520   MPI_Comm       comm = ((PetscObject)mat)->comm;
3521   Mat_SeqAIJ     *aij;
3522 
3523 
3524   PetscFunctionBegin;
3525   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3527 
3528   if (call ==  MAT_REUSE_MATRIX) {
3529     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3530     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3531     local = &Mreuse;
3532     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3533   } else {
3534     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3535     Mreuse = *local;
3536     ierr   = PetscFree(local);CHKERRQ(ierr);
3537   }
3538 
3539   /*
3540       m - number of local rows
3541       n - number of columns (same on all processors)
3542       rstart - first row in new global matrix generated
3543   */
3544   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3545   if (call == MAT_INITIAL_MATRIX) {
3546     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3547     ii  = aij->i;
3548     jj  = aij->j;
3549 
3550     /*
3551         Determine the number of non-zeros in the diagonal and off-diagonal
3552         portions of the matrix in order to do correct preallocation
3553     */
3554 
3555     /* first get start and end of "diagonal" columns */
3556     if (csize == PETSC_DECIDE) {
3557       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3558       if (mglobal == n) { /* square matrix */
3559 	nlocal = m;
3560       } else {
3561         nlocal = n/size + ((n % size) > rank);
3562       }
3563     } else {
3564       nlocal = csize;
3565     }
3566     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3567     rstart = rend - nlocal;
3568     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3569 
3570     /* next, compute all the lengths */
3571     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3572     olens = dlens + m;
3573     for (i=0; i<m; i++) {
3574       jend = ii[i+1] - ii[i];
3575       olen = 0;
3576       dlen = 0;
3577       for (j=0; j<jend; j++) {
3578         if (*jj < rstart || *jj >= rend) olen++;
3579         else dlen++;
3580         jj++;
3581       }
3582       olens[i] = olen;
3583       dlens[i] = dlen;
3584     }
3585     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3586     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3587     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3588     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3589     ierr = PetscFree(dlens);CHKERRQ(ierr);
3590   } else {
3591     PetscInt ml,nl;
3592 
3593     M = *newmat;
3594     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3595     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3596     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3597     /*
3598          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3599        rather than the slower MatSetValues().
3600     */
3601     M->was_assembled = PETSC_TRUE;
3602     M->assembled     = PETSC_FALSE;
3603   }
3604   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3605   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3606   ii  = aij->i;
3607   jj  = aij->j;
3608   aa  = aij->a;
3609   for (i=0; i<m; i++) {
3610     row   = rstart + i;
3611     nz    = ii[i+1] - ii[i];
3612     cwork = jj;     jj += nz;
3613     vwork = aa;     aa += nz;
3614     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3615   }
3616 
3617   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619   *newmat = M;
3620 
3621   /* save submatrix used in processor for next request */
3622   if (call ==  MAT_INITIAL_MATRIX) {
3623     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3624     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3625   }
3626 
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 EXTERN_C_BEGIN
3631 #undef __FUNCT__
3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3633 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3634 {
3635   PetscInt       m,cstart, cend,j,nnz,i,d;
3636   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3637   const PetscInt *JJ;
3638   PetscScalar    *values;
3639   PetscErrorCode ierr;
3640 
3641   PetscFunctionBegin;
3642   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3643 
3644   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3645   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3646   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3647   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3648   m      = B->rmap->n;
3649   cstart = B->cmap->rstart;
3650   cend   = B->cmap->rend;
3651   rstart = B->rmap->rstart;
3652 
3653   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3654 
3655 #if defined(PETSC_USE_DEBUGGING)
3656   for (i=0; i<m; i++) {
3657     nnz     = Ii[i+1]- Ii[i];
3658     JJ      = J + Ii[i];
3659     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3660     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3661     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3662   }
3663 #endif
3664 
3665   for (i=0; i<m; i++) {
3666     nnz     = Ii[i+1]- Ii[i];
3667     JJ      = J + Ii[i];
3668     nnz_max = PetscMax(nnz_max,nnz);
3669     d       = 0;
3670     for (j=0; j<nnz; j++) {
3671       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3672     }
3673     d_nnz[i] = d;
3674     o_nnz[i] = nnz - d;
3675   }
3676   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3677   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3678 
3679   if (v) values = (PetscScalar*)v;
3680   else {
3681     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3682     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3683   }
3684 
3685   for (i=0; i<m; i++) {
3686     ii   = i + rstart;
3687     nnz  = Ii[i+1]- Ii[i];
3688     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3689   }
3690   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692 
3693   if (!v) {
3694     ierr = PetscFree(values);CHKERRQ(ierr);
3695   }
3696   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3697   PetscFunctionReturn(0);
3698 }
3699 EXTERN_C_END
3700 
3701 #undef __FUNCT__
3702 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3703 /*@
3704    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3705    (the default parallel PETSc format).
3706 
3707    Collective on MPI_Comm
3708 
3709    Input Parameters:
3710 +  B - the matrix
3711 .  i - the indices into j for the start of each local row (starts with zero)
3712 .  j - the column indices for each local row (starts with zero)
3713 -  v - optional values in the matrix
3714 
3715    Level: developer
3716 
3717    Notes:
3718        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3719      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3720      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3721 
3722        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3723 
3724        The format which is used for the sparse matrix input, is equivalent to a
3725     row-major ordering.. i.e for the following matrix, the input data expected is
3726     as shown:
3727 
3728         1 0 0
3729         2 0 3     P0
3730        -------
3731         4 5 6     P1
3732 
3733      Process0 [P0]: rows_owned=[0,1]
3734         i =  {0,1,3}  [size = nrow+1  = 2+1]
3735         j =  {0,0,2}  [size = nz = 6]
3736         v =  {1,2,3}  [size = nz = 6]
3737 
3738      Process1 [P1]: rows_owned=[2]
3739         i =  {0,3}    [size = nrow+1  = 1+1]
3740         j =  {0,1,2}  [size = nz = 6]
3741         v =  {4,5,6}  [size = nz = 6]
3742 
3743 .keywords: matrix, aij, compressed row, sparse, parallel
3744 
3745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3746           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3747 @*/
3748 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3749 {
3750   PetscErrorCode ierr;
3751 
3752   PetscFunctionBegin;
3753   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3754   PetscFunctionReturn(0);
3755 }
3756 
3757 #undef __FUNCT__
3758 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3759 /*@C
3760    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3761    (the default parallel PETSc format).  For good matrix assembly performance
3762    the user should preallocate the matrix storage by setting the parameters
3763    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3764    performance can be increased by more than a factor of 50.
3765 
3766    Collective on MPI_Comm
3767 
3768    Input Parameters:
3769 +  A - the matrix
3770 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3771            (same value is used for all local rows)
3772 .  d_nnz - array containing the number of nonzeros in the various rows of the
3773            DIAGONAL portion of the local submatrix (possibly different for each row)
3774            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3775            The size of this array is equal to the number of local rows, i.e 'm'.
3776            For matrices that will be factored, you must leave room for (and set)
3777            the diagonal entry even if it is zero.
3778 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3779            submatrix (same value is used for all local rows).
3780 -  o_nnz - array containing the number of nonzeros in the various rows of the
3781            OFF-DIAGONAL portion of the local submatrix (possibly different for
3782            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3783            structure. The size of this array is equal to the number
3784            of local rows, i.e 'm'.
3785 
3786    If the *_nnz parameter is given then the *_nz parameter is ignored
3787 
3788    The AIJ format (also called the Yale sparse matrix format or
3789    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3790    storage.  The stored row and column indices begin with zero.
3791    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3792 
3793    The parallel matrix is partitioned such that the first m0 rows belong to
3794    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3795    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3796 
3797    The DIAGONAL portion of the local submatrix of a processor can be defined
3798    as the submatrix which is obtained by extraction the part corresponding to
3799    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3800    first row that belongs to the processor, r2 is the last row belonging to
3801    the this processor, and c1-c2 is range of indices of the local part of a
3802    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3803    common case of a square matrix, the row and column ranges are the same and
3804    the DIAGONAL part is also square. The remaining portion of the local
3805    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3806 
3807    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3808 
3809    You can call MatGetInfo() to get information on how effective the preallocation was;
3810    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3811    You can also run with the option -info and look for messages with the string
3812    malloc in them to see if additional memory allocation was needed.
3813 
3814    Example usage:
3815 
3816    Consider the following 8x8 matrix with 34 non-zero values, that is
3817    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3818    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3819    as follows:
3820 
3821 .vb
3822             1  2  0  |  0  3  0  |  0  4
3823     Proc0   0  5  6  |  7  0  0  |  8  0
3824             9  0 10  | 11  0  0  | 12  0
3825     -------------------------------------
3826            13  0 14  | 15 16 17  |  0  0
3827     Proc1   0 18  0  | 19 20 21  |  0  0
3828             0  0  0  | 22 23  0  | 24  0
3829     -------------------------------------
3830     Proc2  25 26 27  |  0  0 28  | 29  0
3831            30  0  0  | 31 32 33  |  0 34
3832 .ve
3833 
3834    This can be represented as a collection of submatrices as:
3835 
3836 .vb
3837       A B C
3838       D E F
3839       G H I
3840 .ve
3841 
3842    Where the submatrices A,B,C are owned by proc0, D,E,F are
3843    owned by proc1, G,H,I are owned by proc2.
3844 
3845    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3846    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3847    The 'M','N' parameters are 8,8, and have the same values on all procs.
3848 
3849    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3850    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3851    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3852    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3853    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3854    matrix, ans [DF] as another SeqAIJ matrix.
3855 
3856    When d_nz, o_nz parameters are specified, d_nz storage elements are
3857    allocated for every row of the local diagonal submatrix, and o_nz
3858    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3859    One way to choose d_nz and o_nz is to use the max nonzerors per local
3860    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3861    In this case, the values of d_nz,o_nz are:
3862 .vb
3863      proc0 : dnz = 2, o_nz = 2
3864      proc1 : dnz = 3, o_nz = 2
3865      proc2 : dnz = 1, o_nz = 4
3866 .ve
3867    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3868    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3869    for proc3. i.e we are using 12+15+10=37 storage locations to store
3870    34 values.
3871 
3872    When d_nnz, o_nnz parameters are specified, the storage is specified
3873    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3874    In the above case the values for d_nnz,o_nnz are:
3875 .vb
3876      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3877      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3878      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3879 .ve
3880    Here the space allocated is sum of all the above values i.e 34, and
3881    hence pre-allocation is perfect.
3882 
3883    Level: intermediate
3884 
3885 .keywords: matrix, aij, compressed row, sparse, parallel
3886 
3887 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3888           MPIAIJ, MatGetInfo()
3889 @*/
3890 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3891 {
3892   PetscErrorCode ierr;
3893 
3894   PetscFunctionBegin;
3895   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3896   PetscValidType(B,1);
3897   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 #undef __FUNCT__
3902 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3903 /*@
3904      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3905          CSR format the local rows.
3906 
3907    Collective on MPI_Comm
3908 
3909    Input Parameters:
3910 +  comm - MPI communicator
3911 .  m - number of local rows (Cannot be PETSC_DECIDE)
3912 .  n - This value should be the same as the local size used in creating the
3913        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3914        calculated if N is given) For square matrices n is almost always m.
3915 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3916 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3917 .   i - row indices
3918 .   j - column indices
3919 -   a - matrix values
3920 
3921    Output Parameter:
3922 .   mat - the matrix
3923 
3924    Level: intermediate
3925 
3926    Notes:
3927        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3928      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3929      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3930 
3931        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3932 
3933        The format which is used for the sparse matrix input, is equivalent to a
3934     row-major ordering.. i.e for the following matrix, the input data expected is
3935     as shown:
3936 
3937         1 0 0
3938         2 0 3     P0
3939        -------
3940         4 5 6     P1
3941 
3942      Process0 [P0]: rows_owned=[0,1]
3943         i =  {0,1,3}  [size = nrow+1  = 2+1]
3944         j =  {0,0,2}  [size = nz = 6]
3945         v =  {1,2,3}  [size = nz = 6]
3946 
3947      Process1 [P1]: rows_owned=[2]
3948         i =  {0,3}    [size = nrow+1  = 1+1]
3949         j =  {0,1,2}  [size = nz = 6]
3950         v =  {4,5,6}  [size = nz = 6]
3951 
3952 .keywords: matrix, aij, compressed row, sparse, parallel
3953 
3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3955           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3956 @*/
3957 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3958 {
3959   PetscErrorCode ierr;
3960 
3961  PetscFunctionBegin;
3962   if (i[0]) {
3963     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3964   }
3965   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3966   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3967   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3968   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3969   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3970   PetscFunctionReturn(0);
3971 }
3972 
3973 #undef __FUNCT__
3974 #define __FUNCT__ "MatCreateMPIAIJ"
3975 /*@C
3976    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3977    (the default parallel PETSc format).  For good matrix assembly performance
3978    the user should preallocate the matrix storage by setting the parameters
3979    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3980    performance can be increased by more than a factor of 50.
3981 
3982    Collective on MPI_Comm
3983 
3984    Input Parameters:
3985 +  comm - MPI communicator
3986 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3987            This value should be the same as the local size used in creating the
3988            y vector for the matrix-vector product y = Ax.
3989 .  n - This value should be the same as the local size used in creating the
3990        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3991        calculated if N is given) For square matrices n is almost always m.
3992 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3993 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3994 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3995            (same value is used for all local rows)
3996 .  d_nnz - array containing the number of nonzeros in the various rows of the
3997            DIAGONAL portion of the local submatrix (possibly different for each row)
3998            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3999            The size of this array is equal to the number of local rows, i.e 'm'.
4000 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4001            submatrix (same value is used for all local rows).
4002 -  o_nnz - array containing the number of nonzeros in the various rows of the
4003            OFF-DIAGONAL portion of the local submatrix (possibly different for
4004            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4005            structure. The size of this array is equal to the number
4006            of local rows, i.e 'm'.
4007 
4008    Output Parameter:
4009 .  A - the matrix
4010 
4011    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4012    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4013    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4014 
4015    Notes:
4016    If the *_nnz parameter is given then the *_nz parameter is ignored
4017 
4018    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4019    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4020    storage requirements for this matrix.
4021 
4022    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4023    processor than it must be used on all processors that share the object for
4024    that argument.
4025 
4026    The user MUST specify either the local or global matrix dimensions
4027    (possibly both).
4028 
4029    The parallel matrix is partitioned across processors such that the
4030    first m0 rows belong to process 0, the next m1 rows belong to
4031    process 1, the next m2 rows belong to process 2 etc.. where
4032    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4033    values corresponding to [m x N] submatrix.
4034 
4035    The columns are logically partitioned with the n0 columns belonging
4036    to 0th partition, the next n1 columns belonging to the next
4037    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4038 
4039    The DIAGONAL portion of the local submatrix on any given processor
4040    is the submatrix corresponding to the rows and columns m,n
4041    corresponding to the given processor. i.e diagonal matrix on
4042    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4043    etc. The remaining portion of the local submatrix [m x (N-n)]
4044    constitute the OFF-DIAGONAL portion. The example below better
4045    illustrates this concept.
4046 
4047    For a square global matrix we define each processor's diagonal portion
4048    to be its local rows and the corresponding columns (a square submatrix);
4049    each processor's off-diagonal portion encompasses the remainder of the
4050    local matrix (a rectangular submatrix).
4051 
4052    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4053 
4054    When calling this routine with a single process communicator, a matrix of
4055    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4056    type of communicator, use the construction mechanism:
4057      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4058 
4059    By default, this format uses inodes (identical nodes) when possible.
4060    We search for consecutive rows with the same nonzero structure, thereby
4061    reusing matrix information to achieve increased efficiency.
4062 
4063    Options Database Keys:
4064 +  -mat_no_inode  - Do not use inodes
4065 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4066 -  -mat_aij_oneindex - Internally use indexing starting at 1
4067         rather than 0.  Note that when calling MatSetValues(),
4068         the user still MUST index entries starting at 0!
4069 
4070 
4071    Example usage:
4072 
4073    Consider the following 8x8 matrix with 34 non-zero values, that is
4074    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4075    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4076    as follows:
4077 
4078 .vb
4079             1  2  0  |  0  3  0  |  0  4
4080     Proc0   0  5  6  |  7  0  0  |  8  0
4081             9  0 10  | 11  0  0  | 12  0
4082     -------------------------------------
4083            13  0 14  | 15 16 17  |  0  0
4084     Proc1   0 18  0  | 19 20 21  |  0  0
4085             0  0  0  | 22 23  0  | 24  0
4086     -------------------------------------
4087     Proc2  25 26 27  |  0  0 28  | 29  0
4088            30  0  0  | 31 32 33  |  0 34
4089 .ve
4090 
4091    This can be represented as a collection of submatrices as:
4092 
4093 .vb
4094       A B C
4095       D E F
4096       G H I
4097 .ve
4098 
4099    Where the submatrices A,B,C are owned by proc0, D,E,F are
4100    owned by proc1, G,H,I are owned by proc2.
4101 
4102    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4103    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4104    The 'M','N' parameters are 8,8, and have the same values on all procs.
4105 
4106    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4107    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4108    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4109    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4110    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4111    matrix, ans [DF] as another SeqAIJ matrix.
4112 
4113    When d_nz, o_nz parameters are specified, d_nz storage elements are
4114    allocated for every row of the local diagonal submatrix, and o_nz
4115    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4116    One way to choose d_nz and o_nz is to use the max nonzerors per local
4117    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4118    In this case, the values of d_nz,o_nz are:
4119 .vb
4120      proc0 : dnz = 2, o_nz = 2
4121      proc1 : dnz = 3, o_nz = 2
4122      proc2 : dnz = 1, o_nz = 4
4123 .ve
4124    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4125    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4126    for proc3. i.e we are using 12+15+10=37 storage locations to store
4127    34 values.
4128 
4129    When d_nnz, o_nnz parameters are specified, the storage is specified
4130    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4131    In the above case the values for d_nnz,o_nnz are:
4132 .vb
4133      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4134      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4135      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4136 .ve
4137    Here the space allocated is sum of all the above values i.e 34, and
4138    hence pre-allocation is perfect.
4139 
4140    Level: intermediate
4141 
4142 .keywords: matrix, aij, compressed row, sparse, parallel
4143 
4144 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4145           MPIAIJ, MatCreateMPIAIJWithArrays()
4146 @*/
4147 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4148 {
4149   PetscErrorCode ierr;
4150   PetscMPIInt    size;
4151 
4152   PetscFunctionBegin;
4153   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4154   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4156   if (size > 1) {
4157     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4158     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4159   } else {
4160     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4161     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4162   }
4163   PetscFunctionReturn(0);
4164 }
4165 
4166 #undef __FUNCT__
4167 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4168 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4169 {
4170   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4171 
4172   PetscFunctionBegin;
4173   *Ad     = a->A;
4174   *Ao     = a->B;
4175   *colmap = a->garray;
4176   PetscFunctionReturn(0);
4177 }
4178 
4179 #undef __FUNCT__
4180 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4181 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4182 {
4183   PetscErrorCode ierr;
4184   PetscInt       i;
4185   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4186 
4187   PetscFunctionBegin;
4188   if (coloring->ctype == IS_COLORING_GLOBAL) {
4189     ISColoringValue *allcolors,*colors;
4190     ISColoring      ocoloring;
4191 
4192     /* set coloring for diagonal portion */
4193     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4194 
4195     /* set coloring for off-diagonal portion */
4196     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4197     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4198     for (i=0; i<a->B->cmap->n; i++) {
4199       colors[i] = allcolors[a->garray[i]];
4200     }
4201     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4202     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4203     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4204     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4205   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4206     ISColoringValue *colors;
4207     PetscInt        *larray;
4208     ISColoring      ocoloring;
4209 
4210     /* set coloring for diagonal portion */
4211     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4212     for (i=0; i<a->A->cmap->n; i++) {
4213       larray[i] = i + A->cmap->rstart;
4214     }
4215     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4216     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4217     for (i=0; i<a->A->cmap->n; i++) {
4218       colors[i] = coloring->colors[larray[i]];
4219     }
4220     ierr = PetscFree(larray);CHKERRQ(ierr);
4221     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4222     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4223     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4224 
4225     /* set coloring for off-diagonal portion */
4226     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4227     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4228     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4229     for (i=0; i<a->B->cmap->n; i++) {
4230       colors[i] = coloring->colors[larray[i]];
4231     }
4232     ierr = PetscFree(larray);CHKERRQ(ierr);
4233     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4234     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4235     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4236   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4237 
4238   PetscFunctionReturn(0);
4239 }
4240 
4241 #if defined(PETSC_HAVE_ADIC)
4242 #undef __FUNCT__
4243 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4244 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4245 {
4246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4247   PetscErrorCode ierr;
4248 
4249   PetscFunctionBegin;
4250   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4251   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4252   PetscFunctionReturn(0);
4253 }
4254 #endif
4255 
4256 #undef __FUNCT__
4257 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4258 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4259 {
4260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4261   PetscErrorCode ierr;
4262 
4263   PetscFunctionBegin;
4264   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4265   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4266   PetscFunctionReturn(0);
4267 }
4268 
4269 #undef __FUNCT__
4270 #define __FUNCT__ "MatMergeSymbolic"
4271 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4272 {
4273   PetscErrorCode ierr;
4274   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4275   PetscInt       *indx;
4276 
4277   PetscFunctionBegin;
4278   /* This routine will ONLY return MPIAIJ type matrix */
4279   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4280   if (n == PETSC_DECIDE){
4281     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4282   }
4283   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4284   rstart -= m;
4285 
4286   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4287   for (i=0;i<m;i++) {
4288     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4289     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4290     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4291   }
4292 
4293   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4294   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4295   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4296   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4297   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4298   PetscFunctionReturn(0);
4299 }
4300 
4301 #undef __FUNCT__
4302 #define __FUNCT__ "MatMergeNumeric"
4303 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4304 {
4305   PetscErrorCode ierr;
4306   PetscInt       m,N,i,rstart,nnz,Ii;
4307   PetscInt       *indx;
4308   PetscScalar    *values;
4309 
4310   PetscFunctionBegin;
4311   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4312   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4313   for (i=0;i<m;i++) {
4314     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4315     Ii    = i + rstart;
4316     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4317     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4318   }
4319   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4320   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4321   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4322   PetscFunctionReturn(0);
4323 }
4324 
4325 #undef __FUNCT__
4326 #define __FUNCT__ "MatMerge"
4327 /*@
4328       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4329                  matrices from each processor
4330 
4331     Collective on MPI_Comm
4332 
4333    Input Parameters:
4334 +    comm - the communicators the parallel matrix will live on
4335 .    inmat - the input sequential matrices
4336 .    n - number of local columns (or PETSC_DECIDE)
4337 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4338 
4339    Output Parameter:
4340 .    outmat - the parallel matrix generated
4341 
4342     Level: advanced
4343 
4344    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4345 
4346 @*/
4347 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4348 {
4349   PetscErrorCode ierr;
4350 
4351   PetscFunctionBegin;
4352   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4353   if (scall == MAT_INITIAL_MATRIX){
4354     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4355   }
4356   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4357   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4358   PetscFunctionReturn(0);
4359 }
4360 
4361 #undef __FUNCT__
4362 #define __FUNCT__ "MatFileSplit"
4363 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4364 {
4365   PetscErrorCode    ierr;
4366   PetscMPIInt       rank;
4367   PetscInt          m,N,i,rstart,nnz;
4368   size_t            len;
4369   const PetscInt    *indx;
4370   PetscViewer       out;
4371   char              *name;
4372   Mat               B;
4373   const PetscScalar *values;
4374 
4375   PetscFunctionBegin;
4376   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4377   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4378   /* Should this be the type of the diagonal block of A? */
4379   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4380   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4381   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4382   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4383   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4384   for (i=0;i<m;i++) {
4385     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4386     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4387     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4388   }
4389   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4390   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4391 
4392   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4393   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4394   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4395   sprintf(name,"%s.%d",outfile,rank);
4396   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4397   ierr = PetscFree(name);
4398   ierr = MatView(B,out);CHKERRQ(ierr);
4399   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4400   ierr = MatDestroy(&B);CHKERRQ(ierr);
4401   PetscFunctionReturn(0);
4402 }
4403 
4404 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4405 #undef __FUNCT__
4406 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4407 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4408 {
4409   PetscErrorCode       ierr;
4410   Mat_Merge_SeqsToMPI  *merge;
4411   PetscContainer       container;
4412 
4413   PetscFunctionBegin;
4414   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4415   if (container) {
4416     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4429     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4430     ierr = PetscFree(merge);CHKERRQ(ierr);
4431     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4432   }
4433   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 #include <../src/mat/utils/freespace.h>
4438 #include <petscbt.h>
4439 
4440 #undef __FUNCT__
4441 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4442 /*@C
4443       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4444                  matrices from each processor
4445 
4446     Collective on MPI_Comm
4447 
4448    Input Parameters:
4449 +    comm - the communicators the parallel matrix will live on
4450 .    seqmat - the input sequential matrices
4451 .    m - number of local rows (or PETSC_DECIDE)
4452 .    n - number of local columns (or PETSC_DECIDE)
4453 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4454 
4455    Output Parameter:
4456 .    mpimat - the parallel matrix generated
4457 
4458     Level: advanced
4459 
4460    Notes:
4461      The dimensions of the sequential matrix in each processor MUST be the same.
4462      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4463      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4464 @*/
4465 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4466 {
4467   PetscErrorCode       ierr;
4468   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4469   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4470   PetscMPIInt          size,rank,taga,*len_s;
4471   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4472   PetscInt             proc,m;
4473   PetscInt             **buf_ri,**buf_rj;
4474   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4475   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4476   MPI_Request          *s_waits,*r_waits;
4477   MPI_Status           *status;
4478   MatScalar            *aa=a->a;
4479   MatScalar            **abuf_r,*ba_i;
4480   Mat_Merge_SeqsToMPI  *merge;
4481   PetscContainer       container;
4482 
4483   PetscFunctionBegin;
4484   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4485 
4486   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4487   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4488 
4489   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4490   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4491 
4492   bi     = merge->bi;
4493   bj     = merge->bj;
4494   buf_ri = merge->buf_ri;
4495   buf_rj = merge->buf_rj;
4496 
4497   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4498   owners = merge->rowmap->range;
4499   len_s  = merge->len_s;
4500 
4501   /* send and recv matrix values */
4502   /*-----------------------------*/
4503   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4504   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4505 
4506   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4507   for (proc=0,k=0; proc<size; proc++){
4508     if (!len_s[proc]) continue;
4509     i = owners[proc];
4510     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4511     k++;
4512   }
4513 
4514   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4515   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4516   ierr = PetscFree(status);CHKERRQ(ierr);
4517 
4518   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4519   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4520 
4521   /* insert mat values of mpimat */
4522   /*----------------------------*/
4523   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4524   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4525 
4526   for (k=0; k<merge->nrecv; k++){
4527     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4528     nrows = *(buf_ri_k[k]);
4529     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4530     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4531   }
4532 
4533   /* set values of ba */
4534   m = merge->rowmap->n;
4535   for (i=0; i<m; i++) {
4536     arow = owners[rank] + i;
4537     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4538     bnzi = bi[i+1] - bi[i];
4539     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4540 
4541     /* add local non-zero vals of this proc's seqmat into ba */
4542     anzi = ai[arow+1] - ai[arow];
4543     aj   = a->j + ai[arow];
4544     aa   = a->a + ai[arow];
4545     nextaj = 0;
4546     for (j=0; nextaj<anzi; j++){
4547       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4548         ba_i[j] += aa[nextaj++];
4549       }
4550     }
4551 
4552     /* add received vals into ba */
4553     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4554       /* i-th row */
4555       if (i == *nextrow[k]) {
4556         anzi = *(nextai[k]+1) - *nextai[k];
4557         aj   = buf_rj[k] + *(nextai[k]);
4558         aa   = abuf_r[k] + *(nextai[k]);
4559         nextaj = 0;
4560         for (j=0; nextaj<anzi; j++){
4561           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4562             ba_i[j] += aa[nextaj++];
4563           }
4564         }
4565         nextrow[k]++; nextai[k]++;
4566       }
4567     }
4568     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4569   }
4570   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4571   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4572 
4573   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4574   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4575   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4576   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4577   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4578   PetscFunctionReturn(0);
4579 }
4580 
4581 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4582 
4583 #undef __FUNCT__
4584 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4585 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4586 {
4587   PetscErrorCode       ierr;
4588   Mat                  B_mpi;
4589   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4590   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4591   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4592   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4593   PetscInt             len,proc,*dnz,*onz;
4594   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4595   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4596   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4597   MPI_Status           *status;
4598   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4599   PetscBT              lnkbt;
4600   Mat_Merge_SeqsToMPI  *merge;
4601   PetscContainer       container;
4602 
4603   PetscFunctionBegin;
4604   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4605 
4606   /* make sure it is a PETSc comm */
4607   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4608   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4609   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4610 
4611   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4612   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4613 
4614   /* determine row ownership */
4615   /*---------------------------------------------------------*/
4616   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4617   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4618   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4619   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4620   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4621   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4622   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4623 
4624   m      = merge->rowmap->n;
4625   M      = merge->rowmap->N;
4626   owners = merge->rowmap->range;
4627 
4628   /* determine the number of messages to send, their lengths */
4629   /*---------------------------------------------------------*/
4630   len_s  = merge->len_s;
4631 
4632   len = 0;  /* length of buf_si[] */
4633   merge->nsend = 0;
4634   for (proc=0; proc<size; proc++){
4635     len_si[proc] = 0;
4636     if (proc == rank){
4637       len_s[proc] = 0;
4638     } else {
4639       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4640       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4641     }
4642     if (len_s[proc]) {
4643       merge->nsend++;
4644       nrows = 0;
4645       for (i=owners[proc]; i<owners[proc+1]; i++){
4646         if (ai[i+1] > ai[i]) nrows++;
4647       }
4648       len_si[proc] = 2*(nrows+1);
4649       len += len_si[proc];
4650     }
4651   }
4652 
4653   /* determine the number and length of messages to receive for ij-structure */
4654   /*-------------------------------------------------------------------------*/
4655   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4656   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4657 
4658   /* post the Irecv of j-structure */
4659   /*-------------------------------*/
4660   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4661   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4662 
4663   /* post the Isend of j-structure */
4664   /*--------------------------------*/
4665   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4666 
4667   for (proc=0, k=0; proc<size; proc++){
4668     if (!len_s[proc]) continue;
4669     i = owners[proc];
4670     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4671     k++;
4672   }
4673 
4674   /* receives and sends of j-structure are complete */
4675   /*------------------------------------------------*/
4676   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4677   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4678 
4679   /* send and recv i-structure */
4680   /*---------------------------*/
4681   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4682   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4683 
4684   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4685   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4686   for (proc=0,k=0; proc<size; proc++){
4687     if (!len_s[proc]) continue;
4688     /* form outgoing message for i-structure:
4689          buf_si[0]:                 nrows to be sent
4690                [1:nrows]:           row index (global)
4691                [nrows+1:2*nrows+1]: i-structure index
4692     */
4693     /*-------------------------------------------*/
4694     nrows = len_si[proc]/2 - 1;
4695     buf_si_i    = buf_si + nrows+1;
4696     buf_si[0]   = nrows;
4697     buf_si_i[0] = 0;
4698     nrows = 0;
4699     for (i=owners[proc]; i<owners[proc+1]; i++){
4700       anzi = ai[i+1] - ai[i];
4701       if (anzi) {
4702         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4703         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4704         nrows++;
4705       }
4706     }
4707     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4708     k++;
4709     buf_si += len_si[proc];
4710   }
4711 
4712   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4713   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4714 
4715   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4716   for (i=0; i<merge->nrecv; i++){
4717     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4718   }
4719 
4720   ierr = PetscFree(len_si);CHKERRQ(ierr);
4721   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4722   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4723   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4724   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4725   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4726   ierr = PetscFree(status);CHKERRQ(ierr);
4727 
4728   /* compute a local seq matrix in each processor */
4729   /*----------------------------------------------*/
4730   /* allocate bi array and free space for accumulating nonzero column info */
4731   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4732   bi[0] = 0;
4733 
4734   /* create and initialize a linked list */
4735   nlnk = N+1;
4736   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4737 
4738   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4739   len = 0;
4740   len  = ai[owners[rank+1]] - ai[owners[rank]];
4741   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4742   current_space = free_space;
4743 
4744   /* determine symbolic info for each local row */
4745   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4746 
4747   for (k=0; k<merge->nrecv; k++){
4748     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4749     nrows = *buf_ri_k[k];
4750     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4751     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4752   }
4753 
4754   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4755   len = 0;
4756   for (i=0;i<m;i++) {
4757     bnzi   = 0;
4758     /* add local non-zero cols of this proc's seqmat into lnk */
4759     arow   = owners[rank] + i;
4760     anzi   = ai[arow+1] - ai[arow];
4761     aj     = a->j + ai[arow];
4762     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4763     bnzi += nlnk;
4764     /* add received col data into lnk */
4765     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4766       if (i == *nextrow[k]) { /* i-th row */
4767         anzi = *(nextai[k]+1) - *nextai[k];
4768         aj   = buf_rj[k] + *nextai[k];
4769         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4770         bnzi += nlnk;
4771         nextrow[k]++; nextai[k]++;
4772       }
4773     }
4774     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4775 
4776     /* if free space is not available, make more free space */
4777     if (current_space->local_remaining<bnzi) {
4778       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4779       nspacedouble++;
4780     }
4781     /* copy data into free space, then initialize lnk */
4782     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4783     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4784 
4785     current_space->array           += bnzi;
4786     current_space->local_used      += bnzi;
4787     current_space->local_remaining -= bnzi;
4788 
4789     bi[i+1] = bi[i] + bnzi;
4790   }
4791 
4792   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4793 
4794   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4795   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4796   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4797 
4798   /* create symbolic parallel matrix B_mpi */
4799   /*---------------------------------------*/
4800   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4801   if (n==PETSC_DECIDE) {
4802     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4803   } else {
4804     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4805   }
4806   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4807   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4808   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4809 
4810   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4811   B_mpi->assembled     = PETSC_FALSE;
4812   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4813   merge->bi            = bi;
4814   merge->bj            = bj;
4815   merge->buf_ri        = buf_ri;
4816   merge->buf_rj        = buf_rj;
4817   merge->coi           = PETSC_NULL;
4818   merge->coj           = PETSC_NULL;
4819   merge->owners_co     = PETSC_NULL;
4820 
4821   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4822 
4823   /* attach the supporting struct to B_mpi for reuse */
4824   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4825   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4826   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4827   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4828   *mpimat = B_mpi;
4829 
4830   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4831   PetscFunctionReturn(0);
4832 }
4833 
4834 #undef __FUNCT__
4835 #define __FUNCT__ "MatMerge_SeqsToMPI"
4836 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4837 {
4838   PetscErrorCode   ierr;
4839 
4840   PetscFunctionBegin;
4841   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4842   if (scall == MAT_INITIAL_MATRIX){
4843     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4844   }
4845   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4846   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4847   PetscFunctionReturn(0);
4848 }
4849 
4850 #undef __FUNCT__
4851 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4852 /*@
4853      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4854           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4855           with MatGetSize()
4856 
4857     Not Collective
4858 
4859    Input Parameters:
4860 +    A - the matrix
4861 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4862 
4863    Output Parameter:
4864 .    A_loc - the local sequential matrix generated
4865 
4866     Level: developer
4867 
4868 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4869 
4870 @*/
4871 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4872 {
4873   PetscErrorCode  ierr;
4874   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4875   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4876   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4877   MatScalar       *aa=a->a,*ba=b->a,*cam;
4878   PetscScalar     *ca;
4879   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4880   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4881   PetscBool       match;
4882 
4883   PetscFunctionBegin;
4884   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4885   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4886   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4887   if (scall == MAT_INITIAL_MATRIX){
4888     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4889     ci[0] = 0;
4890     for (i=0; i<am; i++){
4891       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4892     }
4893     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4894     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4895     k = 0;
4896     for (i=0; i<am; i++) {
4897       ncols_o = bi[i+1] - bi[i];
4898       ncols_d = ai[i+1] - ai[i];
4899       /* off-diagonal portion of A */
4900       for (jo=0; jo<ncols_o; jo++) {
4901         col = cmap[*bj];
4902         if (col >= cstart) break;
4903         cj[k]   = col; bj++;
4904         ca[k++] = *ba++;
4905       }
4906       /* diagonal portion of A */
4907       for (j=0; j<ncols_d; j++) {
4908         cj[k]   = cstart + *aj++;
4909         ca[k++] = *aa++;
4910       }
4911       /* off-diagonal portion of A */
4912       for (j=jo; j<ncols_o; j++) {
4913         cj[k]   = cmap[*bj++];
4914         ca[k++] = *ba++;
4915       }
4916     }
4917     /* put together the new matrix */
4918     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4919     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4920     /* Since these are PETSc arrays, change flags to free them as necessary. */
4921     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4922     mat->free_a  = PETSC_TRUE;
4923     mat->free_ij = PETSC_TRUE;
4924     mat->nonew   = 0;
4925   } else if (scall == MAT_REUSE_MATRIX){
4926     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4927     ci = mat->i; cj = mat->j; cam = mat->a;
4928     for (i=0; i<am; i++) {
4929       /* off-diagonal portion of A */
4930       ncols_o = bi[i+1] - bi[i];
4931       for (jo=0; jo<ncols_o; jo++) {
4932         col = cmap[*bj];
4933         if (col >= cstart) break;
4934         *cam++ = *ba++; bj++;
4935       }
4936       /* diagonal portion of A */
4937       ncols_d = ai[i+1] - ai[i];
4938       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4939       /* off-diagonal portion of A */
4940       for (j=jo; j<ncols_o; j++) {
4941         *cam++ = *ba++; bj++;
4942       }
4943     }
4944   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4945   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4946   PetscFunctionReturn(0);
4947 }
4948 
4949 #undef __FUNCT__
4950 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4951 /*@C
4952      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4953 
4954     Not Collective
4955 
4956    Input Parameters:
4957 +    A - the matrix
4958 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4959 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4960 
4961    Output Parameter:
4962 .    A_loc - the local sequential matrix generated
4963 
4964     Level: developer
4965 
4966 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4967 
4968 @*/
4969 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4970 {
4971   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4972   PetscErrorCode    ierr;
4973   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4974   IS                isrowa,iscola;
4975   Mat               *aloc;
4976   PetscBool       match;
4977 
4978   PetscFunctionBegin;
4979   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4980   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4981   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4982   if (!row){
4983     start = A->rmap->rstart; end = A->rmap->rend;
4984     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4985   } else {
4986     isrowa = *row;
4987   }
4988   if (!col){
4989     start = A->cmap->rstart;
4990     cmap  = a->garray;
4991     nzA   = a->A->cmap->n;
4992     nzB   = a->B->cmap->n;
4993     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4994     ncols = 0;
4995     for (i=0; i<nzB; i++) {
4996       if (cmap[i] < start) idx[ncols++] = cmap[i];
4997       else break;
4998     }
4999     imark = i;
5000     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5001     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5002     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5003   } else {
5004     iscola = *col;
5005   }
5006   if (scall != MAT_INITIAL_MATRIX){
5007     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5008     aloc[0] = *A_loc;
5009   }
5010   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5011   *A_loc = aloc[0];
5012   ierr = PetscFree(aloc);CHKERRQ(ierr);
5013   if (!row){
5014     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5015   }
5016   if (!col){
5017     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5018   }
5019   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 #undef __FUNCT__
5024 #define __FUNCT__ "MatGetBrowsOfAcols"
5025 /*@C
5026     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5027 
5028     Collective on Mat
5029 
5030    Input Parameters:
5031 +    A,B - the matrices in mpiaij format
5032 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5033 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5034 
5035    Output Parameter:
5036 +    rowb, colb - index sets of rows and columns of B to extract
5037 -    B_seq - the sequential matrix generated
5038 
5039     Level: developer
5040 
5041 @*/
5042 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5043 {
5044   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5045   PetscErrorCode    ierr;
5046   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5047   IS                isrowb,iscolb;
5048   Mat               *bseq=PETSC_NULL;
5049 
5050   PetscFunctionBegin;
5051   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5052     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5053   }
5054   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5055 
5056   if (scall == MAT_INITIAL_MATRIX){
5057     start = A->cmap->rstart;
5058     cmap  = a->garray;
5059     nzA   = a->A->cmap->n;
5060     nzB   = a->B->cmap->n;
5061     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5062     ncols = 0;
5063     for (i=0; i<nzB; i++) {  /* row < local row index */
5064       if (cmap[i] < start) idx[ncols++] = cmap[i];
5065       else break;
5066     }
5067     imark = i;
5068     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5069     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5070     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5071     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5072   } else {
5073     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5074     isrowb = *rowb; iscolb = *colb;
5075     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5076     bseq[0] = *B_seq;
5077   }
5078   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5079   *B_seq = bseq[0];
5080   ierr = PetscFree(bseq);CHKERRQ(ierr);
5081   if (!rowb){
5082     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5083   } else {
5084     *rowb = isrowb;
5085   }
5086   if (!colb){
5087     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5088   } else {
5089     *colb = iscolb;
5090   }
5091   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5092   PetscFunctionReturn(0);
5093 }
5094 
5095 #undef __FUNCT__
5096 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5097 /*
5098     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5099     of the OFF-DIAGONAL portion of local A
5100 
5101     Collective on Mat
5102 
5103    Input Parameters:
5104 +    A,B - the matrices in mpiaij format
5105 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5106 
5107    Output Parameter:
5108 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5109 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5110 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5111 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5112 
5113     Level: developer
5114 
5115 */
5116 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5117 {
5118   VecScatter_MPI_General *gen_to,*gen_from;
5119   PetscErrorCode         ierr;
5120   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5121   Mat_SeqAIJ             *b_oth;
5122   VecScatter             ctx=a->Mvctx;
5123   MPI_Comm               comm=((PetscObject)ctx)->comm;
5124   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5125   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5126   PetscScalar            *rvalues,*svalues;
5127   MatScalar              *b_otha,*bufa,*bufA;
5128   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5129   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5130   MPI_Status             *sstatus,rstatus;
5131   PetscMPIInt            jj;
5132   PetscInt               *cols,sbs,rbs;
5133   PetscScalar            *vals;
5134 
5135   PetscFunctionBegin;
5136   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5137     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5138   }
5139   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5140   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5141 
5142   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5143   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5144   rvalues  = gen_from->values; /* holds the length of receiving row */
5145   svalues  = gen_to->values;   /* holds the length of sending row */
5146   nrecvs   = gen_from->n;
5147   nsends   = gen_to->n;
5148 
5149   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5150   srow     = gen_to->indices;   /* local row index to be sent */
5151   sstarts  = gen_to->starts;
5152   sprocs   = gen_to->procs;
5153   sstatus  = gen_to->sstatus;
5154   sbs      = gen_to->bs;
5155   rstarts  = gen_from->starts;
5156   rprocs   = gen_from->procs;
5157   rbs      = gen_from->bs;
5158 
5159   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5160   if (scall == MAT_INITIAL_MATRIX){
5161     /* i-array */
5162     /*---------*/
5163     /*  post receives */
5164     for (i=0; i<nrecvs; i++){
5165       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5166       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5167       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5168     }
5169 
5170     /* pack the outgoing message */
5171     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5172     sstartsj[0] = 0;  rstartsj[0] = 0;
5173     len = 0; /* total length of j or a array to be sent */
5174     k = 0;
5175     for (i=0; i<nsends; i++){
5176       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5177       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5178       for (j=0; j<nrows; j++) {
5179         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5180         for (l=0; l<sbs; l++){
5181           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5182           rowlen[j*sbs+l] = ncols;
5183           len += ncols;
5184           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5185         }
5186         k++;
5187       }
5188       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5189       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5190     }
5191     /* recvs and sends of i-array are completed */
5192     i = nrecvs;
5193     while (i--) {
5194       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5195     }
5196     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5197 
5198     /* allocate buffers for sending j and a arrays */
5199     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5200     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5201 
5202     /* create i-array of B_oth */
5203     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5204     b_othi[0] = 0;
5205     len = 0; /* total length of j or a array to be received */
5206     k = 0;
5207     for (i=0; i<nrecvs; i++){
5208       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5209       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5210       for (j=0; j<nrows; j++) {
5211         b_othi[k+1] = b_othi[k] + rowlen[j];
5212         len += rowlen[j]; k++;
5213       }
5214       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5215     }
5216 
5217     /* allocate space for j and a arrrays of B_oth */
5218     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5219     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5220 
5221     /* j-array */
5222     /*---------*/
5223     /*  post receives of j-array */
5224     for (i=0; i<nrecvs; i++){
5225       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5226       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5227     }
5228 
5229     /* pack the outgoing message j-array */
5230     k = 0;
5231     for (i=0; i<nsends; i++){
5232       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5233       bufJ = bufj+sstartsj[i];
5234       for (j=0; j<nrows; j++) {
5235         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5236         for (ll=0; ll<sbs; ll++){
5237           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5238           for (l=0; l<ncols; l++){
5239             *bufJ++ = cols[l];
5240           }
5241           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5242         }
5243       }
5244       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5245     }
5246 
5247     /* recvs and sends of j-array are completed */
5248     i = nrecvs;
5249     while (i--) {
5250       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5251     }
5252     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5253   } else if (scall == MAT_REUSE_MATRIX){
5254     sstartsj = *startsj_s;
5255     rstartsj = *startsj_r;
5256     bufa     = *bufa_ptr;
5257     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5258     b_otha   = b_oth->a;
5259   } else {
5260     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5261   }
5262 
5263   /* a-array */
5264   /*---------*/
5265   /*  post receives of a-array */
5266   for (i=0; i<nrecvs; i++){
5267     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5268     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5269   }
5270 
5271   /* pack the outgoing message a-array */
5272   k = 0;
5273   for (i=0; i<nsends; i++){
5274     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5275     bufA = bufa+sstartsj[i];
5276     for (j=0; j<nrows; j++) {
5277       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5278       for (ll=0; ll<sbs; ll++){
5279         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5280         for (l=0; l<ncols; l++){
5281           *bufA++ = vals[l];
5282         }
5283         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5284       }
5285     }
5286     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5287   }
5288   /* recvs and sends of a-array are completed */
5289   i = nrecvs;
5290   while (i--) {
5291     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5292   }
5293   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5294   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5295 
5296   if (scall == MAT_INITIAL_MATRIX){
5297     /* put together the new matrix */
5298     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5299 
5300     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5301     /* Since these are PETSc arrays, change flags to free them as necessary. */
5302     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5303     b_oth->free_a  = PETSC_TRUE;
5304     b_oth->free_ij = PETSC_TRUE;
5305     b_oth->nonew   = 0;
5306 
5307     ierr = PetscFree(bufj);CHKERRQ(ierr);
5308     if (!startsj_s || !bufa_ptr){
5309       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5310       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5311     } else {
5312       *startsj_s = sstartsj;
5313       *startsj_r = rstartsj;
5314       *bufa_ptr  = bufa;
5315     }
5316   }
5317   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5318   PetscFunctionReturn(0);
5319 }
5320 
5321 #undef __FUNCT__
5322 #define __FUNCT__ "MatGetCommunicationStructs"
5323 /*@C
5324   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5325 
5326   Not Collective
5327 
5328   Input Parameters:
5329 . A - The matrix in mpiaij format
5330 
5331   Output Parameter:
5332 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5333 . colmap - A map from global column index to local index into lvec
5334 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5335 
5336   Level: developer
5337 
5338 @*/
5339 #if defined (PETSC_USE_CTABLE)
5340 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5341 #else
5342 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5343 #endif
5344 {
5345   Mat_MPIAIJ *a;
5346 
5347   PetscFunctionBegin;
5348   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5349   PetscValidPointer(lvec, 2);
5350   PetscValidPointer(colmap, 3);
5351   PetscValidPointer(multScatter, 4);
5352   a = (Mat_MPIAIJ *) A->data;
5353   if (lvec) *lvec = a->lvec;
5354   if (colmap) *colmap = a->colmap;
5355   if (multScatter) *multScatter = a->Mvctx;
5356   PetscFunctionReturn(0);
5357 }
5358 
5359 EXTERN_C_BEGIN
5360 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5361 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5362 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5363 EXTERN_C_END
5364 
5365 #undef __FUNCT__
5366 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5367 /*
5368     Computes (B'*A')' since computing B*A directly is untenable
5369 
5370                n                       p                          p
5371         (              )       (              )         (                  )
5372       m (      A       )  *  n (       B      )   =   m (         C        )
5373         (              )       (              )         (                  )
5374 
5375 */
5376 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5377 {
5378   PetscErrorCode     ierr;
5379   Mat                At,Bt,Ct;
5380 
5381   PetscFunctionBegin;
5382   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5383   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5384   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5385   ierr = MatDestroy(&At);CHKERRQ(ierr);
5386   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5387   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5388   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5389   PetscFunctionReturn(0);
5390 }
5391 
5392 #undef __FUNCT__
5393 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5394 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5395 {
5396   PetscErrorCode ierr;
5397   PetscInt       m=A->rmap->n,n=B->cmap->n;
5398   Mat            Cmat;
5399 
5400   PetscFunctionBegin;
5401   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5402   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5403   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5404   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5405   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5406   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5407   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5408   *C   = Cmat;
5409   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5410   PetscFunctionReturn(0);
5411 }
5412 
5413 /* ----------------------------------------------------------------*/
5414 #undef __FUNCT__
5415 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5416 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5417 {
5418   PetscErrorCode ierr;
5419 
5420   PetscFunctionBegin;
5421   if (scall == MAT_INITIAL_MATRIX){
5422     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5423   }
5424   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5425   PetscFunctionReturn(0);
5426 }
5427 
5428 EXTERN_C_BEGIN
5429 #if defined(PETSC_HAVE_MUMPS)
5430 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5431 #endif
5432 #if defined(PETSC_HAVE_PASTIX)
5433 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5434 #endif
5435 #if defined(PETSC_HAVE_SUPERLU_DIST)
5436 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5437 #endif
5438 #if defined(PETSC_HAVE_SPOOLES)
5439 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5440 #endif
5441 EXTERN_C_END
5442 
5443 /*MC
5444    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5445 
5446    Options Database Keys:
5447 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5448 
5449   Level: beginner
5450 
5451 .seealso: MatCreateMPIAIJ()
5452 M*/
5453 
5454 EXTERN_C_BEGIN
5455 #undef __FUNCT__
5456 #define __FUNCT__ "MatCreate_MPIAIJ"
5457 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5458 {
5459   Mat_MPIAIJ     *b;
5460   PetscErrorCode ierr;
5461   PetscMPIInt    size;
5462 
5463   PetscFunctionBegin;
5464   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5465 
5466   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5467   B->data         = (void*)b;
5468   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5469   B->rmap->bs     = 1;
5470   B->assembled    = PETSC_FALSE;
5471 
5472   B->insertmode   = NOT_SET_VALUES;
5473   b->size         = size;
5474   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5475 
5476   /* build cache for off array entries formed */
5477   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5478   b->donotstash  = PETSC_FALSE;
5479   b->colmap      = 0;
5480   b->garray      = 0;
5481   b->roworiented = PETSC_TRUE;
5482 
5483   /* stuff used for matrix vector multiply */
5484   b->lvec      = PETSC_NULL;
5485   b->Mvctx     = PETSC_NULL;
5486 
5487   /* stuff for MatGetRow() */
5488   b->rowindices   = 0;
5489   b->rowvalues    = 0;
5490   b->getrowactive = PETSC_FALSE;
5491 
5492 #if defined(PETSC_HAVE_SPOOLES)
5493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5494                                      "MatGetFactor_mpiaij_spooles",
5495                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5496 #endif
5497 #if defined(PETSC_HAVE_MUMPS)
5498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5499                                      "MatGetFactor_aij_mumps",
5500                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5501 #endif
5502 #if defined(PETSC_HAVE_PASTIX)
5503   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5504 					   "MatGetFactor_mpiaij_pastix",
5505 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5506 #endif
5507 #if defined(PETSC_HAVE_SUPERLU_DIST)
5508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5509                                      "MatGetFactor_mpiaij_superlu_dist",
5510                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5511 #endif
5512   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5513                                      "MatStoreValues_MPIAIJ",
5514                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5516                                      "MatRetrieveValues_MPIAIJ",
5517                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5519 				     "MatGetDiagonalBlock_MPIAIJ",
5520                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5521   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5522 				     "MatIsTranspose_MPIAIJ",
5523 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5524   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5525 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5526 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5527   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5528 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5529 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5531 				     "MatDiagonalScaleLocal_MPIAIJ",
5532 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5534                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5535                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5536   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5537                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5538                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5539   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5540                                      "MatConvert_MPIAIJ_MPISBAIJ",
5541                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5543                                      "MatMatMult_MPIDense_MPIAIJ",
5544                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5545   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5546                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5547                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5549                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5550                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5551   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5552   PetscFunctionReturn(0);
5553 }
5554 EXTERN_C_END
5555 
5556 #undef __FUNCT__
5557 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5558 /*@
5559      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5560          and "off-diagonal" part of the matrix in CSR format.
5561 
5562    Collective on MPI_Comm
5563 
5564    Input Parameters:
5565 +  comm - MPI communicator
5566 .  m - number of local rows (Cannot be PETSC_DECIDE)
5567 .  n - This value should be the same as the local size used in creating the
5568        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5569        calculated if N is given) For square matrices n is almost always m.
5570 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5571 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5572 .   i - row indices for "diagonal" portion of matrix
5573 .   j - column indices
5574 .   a - matrix values
5575 .   oi - row indices for "off-diagonal" portion of matrix
5576 .   oj - column indices
5577 -   oa - matrix values
5578 
5579    Output Parameter:
5580 .   mat - the matrix
5581 
5582    Level: advanced
5583 
5584    Notes:
5585        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5586        must free the arrays once the matrix has been destroyed and not before.
5587 
5588        The i and j indices are 0 based
5589 
5590        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5591 
5592        This sets local rows and cannot be used to set off-processor values.
5593 
5594        You cannot later use MatSetValues() to change values in this matrix.
5595 
5596 .keywords: matrix, aij, compressed row, sparse, parallel
5597 
5598 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5599           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5600 @*/
5601 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5602 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5603 {
5604   PetscErrorCode ierr;
5605   Mat_MPIAIJ     *maij;
5606 
5607  PetscFunctionBegin;
5608   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5609   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5610   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5611   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5612   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5613   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5614   maij = (Mat_MPIAIJ*) (*mat)->data;
5615   maij->donotstash     = PETSC_TRUE;
5616   (*mat)->preallocated = PETSC_TRUE;
5617 
5618   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5619   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5620   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5621   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5622 
5623   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5624   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5625 
5626   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5627   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5629   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5630 
5631   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5632   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5633   PetscFunctionReturn(0);
5634 }
5635 
5636 /*
5637     Special version for direct calls from Fortran
5638 */
5639 #include <private/fortranimpl.h>
5640 
5641 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5642 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5643 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5644 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5645 #endif
5646 
5647 /* Change these macros so can be used in void function */
5648 #undef CHKERRQ
5649 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5650 #undef SETERRQ2
5651 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5652 #undef SETERRQ3
5653 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5654 #undef SETERRQ
5655 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5656 
5657 EXTERN_C_BEGIN
5658 #undef __FUNCT__
5659 #define __FUNCT__ "matsetvaluesmpiaij_"
5660 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5661 {
5662   Mat             mat = *mmat;
5663   PetscInt        m = *mm, n = *mn;
5664   InsertMode      addv = *maddv;
5665   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5666   PetscScalar     value;
5667   PetscErrorCode  ierr;
5668 
5669   MatCheckPreallocated(mat,1);
5670   if (mat->insertmode == NOT_SET_VALUES) {
5671     mat->insertmode = addv;
5672   }
5673 #if defined(PETSC_USE_DEBUG)
5674   else if (mat->insertmode != addv) {
5675     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5676   }
5677 #endif
5678   {
5679   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5680   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5681   PetscBool       roworiented = aij->roworiented;
5682 
5683   /* Some Variables required in the macro */
5684   Mat             A = aij->A;
5685   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5686   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5687   MatScalar       *aa = a->a;
5688   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5689   Mat             B = aij->B;
5690   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5691   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5692   MatScalar       *ba = b->a;
5693 
5694   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5695   PetscInt        nonew = a->nonew;
5696   MatScalar       *ap1,*ap2;
5697 
5698   PetscFunctionBegin;
5699   for (i=0; i<m; i++) {
5700     if (im[i] < 0) continue;
5701 #if defined(PETSC_USE_DEBUG)
5702     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5703 #endif
5704     if (im[i] >= rstart && im[i] < rend) {
5705       row      = im[i] - rstart;
5706       lastcol1 = -1;
5707       rp1      = aj + ai[row];
5708       ap1      = aa + ai[row];
5709       rmax1    = aimax[row];
5710       nrow1    = ailen[row];
5711       low1     = 0;
5712       high1    = nrow1;
5713       lastcol2 = -1;
5714       rp2      = bj + bi[row];
5715       ap2      = ba + bi[row];
5716       rmax2    = bimax[row];
5717       nrow2    = bilen[row];
5718       low2     = 0;
5719       high2    = nrow2;
5720 
5721       for (j=0; j<n; j++) {
5722         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5723         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5724         if (in[j] >= cstart && in[j] < cend){
5725           col = in[j] - cstart;
5726           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5727         } else if (in[j] < 0) continue;
5728 #if defined(PETSC_USE_DEBUG)
5729         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5730 #endif
5731         else {
5732           if (mat->was_assembled) {
5733             if (!aij->colmap) {
5734               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5735             }
5736 #if defined (PETSC_USE_CTABLE)
5737             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5738 	    col--;
5739 #else
5740             col = aij->colmap[in[j]] - 1;
5741 #endif
5742             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5743               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5744               col =  in[j];
5745               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5746               B = aij->B;
5747               b = (Mat_SeqAIJ*)B->data;
5748               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5749               rp2      = bj + bi[row];
5750               ap2      = ba + bi[row];
5751               rmax2    = bimax[row];
5752               nrow2    = bilen[row];
5753               low2     = 0;
5754               high2    = nrow2;
5755               bm       = aij->B->rmap->n;
5756               ba = b->a;
5757             }
5758           } else col = in[j];
5759           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5760         }
5761       }
5762     } else {
5763       if (!aij->donotstash) {
5764         if (roworiented) {
5765           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5766         } else {
5767           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5768         }
5769       }
5770     }
5771   }}
5772   PetscFunctionReturnVoid();
5773 }
5774 EXTERN_C_END
5775 
5776