xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fba77e201f7e04bdb29755266e71eb546b1bb6b1)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   if (x) {
1004     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008   }
1009   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1010 
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<slen; i++) {
1014     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1015   }
1016 
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use){
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++){
1023       n   = ii[i+1] - ii[i];
1024       aj  = aij->j + ii[i];
1025       aa  = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa        = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n   = ii[i+1] - ii[i];
1041       aj  = aij->j + ii[i];
1042       aa  = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa    = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* wait on sends */
1062   if (nsends) {
1063     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1064     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1065     ierr = PetscFree(send_status);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1068   ierr = PetscFree(svalues);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMult_MPIAIJ"
1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       nt;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1083   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscBool      merged;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1128   /* do nondiagonal part */
1129   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1130   if (!merged) {
1131     /* send it on its way */
1132     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133     /* do local part */
1134     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1135     /* receive remote parts: note this assumes the values are not actually */
1136     /* added in yy until the next line, */
1137     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   } else {
1139     /* do local part */
1140     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141     /* send it on its way */
1142     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143     /* values actually were received in the Begin() but we need to call this nop */
1144     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 EXTERN_C_BEGIN
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1152 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1153 {
1154   MPI_Comm       comm;
1155   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157   IS             Me,Notme;
1158   PetscErrorCode ierr;
1159   PetscInt       M,N,first,last,*notme,i;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163 
1164   /* Easy test: symmetric diagonal block */
1165   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1167   if (!*f) PetscFunctionReturn(0);
1168   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170   if (size == 1) PetscFunctionReturn(0);
1171 
1172   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1175   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1176   for (i=0; i<first; i++) notme[i] = i;
1177   for (i=last; i<M; i++) notme[i-last+first] = i;
1178   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1179   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1180   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1181   Aoff = Aoffs[0];
1182   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1183   Boff = Boffs[0];
1184   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1185   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1187   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1189   ierr = PetscFree(notme);CHKERRQ(ierr);
1190    PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220 {
1221   PetscErrorCode ierr;
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIAIJ"
1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatDestroy_MPIAIJ"
1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247 {
1248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252 #if defined(PETSC_USE_LOG)
1253   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254 #endif
1255   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1256   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1257   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1259 #if defined (PETSC_USE_CTABLE)
1260   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1261 #else
1262   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1266   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1267   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1268   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1269   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286 {
1287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1289   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1292   int               fd;
1293   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1294   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295   PetscScalar       *column_values;
1296   PetscInt          message_count,flowcontrolcount;
1297 
1298   PetscFunctionBegin;
1299   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1300   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1301   nz   = A->nz + B->nz;
1302   if (!rank) {
1303     header[0] = MAT_FILE_CLASSID;
1304     header[1] = mat->rmap->N;
1305     header[2] = mat->cmap->N;
1306     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1307     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1309     /* get largest number of rows any processor has */
1310     rlen = mat->rmap->n;
1311     range = mat->rmap->range;
1312     for (i=1; i<size; i++) {
1313       rlen = PetscMax(rlen,range[i+1] - range[i]);
1314     }
1315   } else {
1316     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1317     rlen = mat->rmap->n;
1318   }
1319 
1320   /* load up the local row counts */
1321   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1322   for (i=0; i<mat->rmap->n; i++) {
1323     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324   }
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1339     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* )th processor needs space a largest processor needs */
1346   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( (col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_indices[cnt++] = A->j[k] + cstart;
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_indices[cnt++] = garray[B->j[j]];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column indices to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1383 
1384   /* load up the local column values */
1385   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1386   cnt  = 0;
1387   for (i=0; i<mat->rmap->n; i++) {
1388     for (j=B->i[i]; j<B->i[i+1]; j++) {
1389       if ( garray[B->j[j]] > cstart) break;
1390       column_values[cnt++] = B->a[j];
1391     }
1392     for (k=A->i[i]; k<A->i[i+1]; k++) {
1393       column_values[cnt++] = A->a[k];
1394     }
1395     for (; j<B->i[i+1]; j++) {
1396       column_values[cnt++] = B->a[j];
1397     }
1398   }
1399   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1400 
1401   /* store the column values to the file */
1402    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1403   if (!rank) {
1404     MPI_Status status;
1405     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     for (i=1; i<size; i++) {
1407        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1408       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1409       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     }
1413     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1414   } else {
1415     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1416     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1419   }
1420   ierr = PetscFree(column_values);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427 {
1428   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1429   PetscErrorCode    ierr;
1430   PetscMPIInt       rank = aij->rank,size = aij->size;
1431   PetscBool         isdraw,iascii,isbinary;
1432   PetscViewer       sviewer;
1433   PetscViewerFormat format;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   if (iascii) {
1440     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1441     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442       MatInfo    info;
1443       PetscBool  inodes;
1444 
1445       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1446       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1449       if (!inodes) {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       }
1456       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1458       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1460       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1463       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466       PetscInt   inodecount,inodelimit,*inodes;
1467       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1468       if (inodes) {
1469         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1472       }
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475       PetscFunctionReturn(0);
1476     }
1477   } else if (isbinary) {
1478     if (size == 1) {
1479       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1480       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1481     } else {
1482       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1483     }
1484     PetscFunctionReturn(0);
1485   } else if (isdraw) {
1486     PetscDraw  draw;
1487     PetscBool  isnull;
1488     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1489     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1490   }
1491 
1492   if (size == 1) {
1493     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1495   } else {
1496     /* assemble the entire matrix onto first processor. */
1497     Mat         A;
1498     Mat_SeqAIJ  *Aloc;
1499     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500     MatScalar   *a;
1501 
1502     if (mat->rmap->N > 1024) {
1503       PetscBool  flg = PETSC_FALSE;
1504 
1505       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1506       if (!flg) {
1507         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508       }
1509     }
1510 
1511     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1512     if (!rank) {
1513       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1514     } else {
1515       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1516     }
1517     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1519     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1521 
1522     /* copy over the A part */
1523     Aloc = (Mat_SeqAIJ*)aij->A->data;
1524     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1525     row = mat->rmap->rstart;
1526     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1527     for (i=0; i<m; i++) {
1528       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1529       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1530     }
1531     aj = Aloc->j;
1532     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1533 
1534     /* copy over the B part */
1535     Aloc = (Mat_SeqAIJ*)aij->B->data;
1536     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1537     row  = mat->rmap->rstart;
1538     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1539     ct   = cols;
1540     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1541     for (i=0; i<m; i++) {
1542       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1543       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1544     }
1545     ierr = PetscFree(ct);CHKERRQ(ierr);
1546     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548     /*
1549        Everyone has to call to draw the matrix since the graphics waits are
1550        synchronized across all processors that share the PetscDraw object
1551     */
1552     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1553     if (!rank) {
1554       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1555       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1556       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1557       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1558     }
1559     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1560     ierr = MatDestroy(&A);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatView_MPIAIJ"
1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1568 {
1569   PetscErrorCode ierr;
1570   PetscBool      iascii,isdraw,issocket,isbinary;
1571 
1572   PetscFunctionBegin;
1573   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1574   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1575   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1576   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1577   if (iascii || isdraw || isbinary || issocket) {
1578     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1579   } else {
1580     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSOR_MPIAIJ"
1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1588 {
1589   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1590   PetscErrorCode ierr;
1591   Vec            bb1 = 0;
1592   PetscBool      hasop;
1593 
1594   PetscFunctionBegin;
1595   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1596     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1597   }
1598 
1599   if (flag == SOR_APPLY_UPPER) {
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1601     PetscFunctionReturn(0);
1602   }
1603 
1604   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1605     if (flag & SOR_ZERO_INITIAL_GUESS) {
1606       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607       its--;
1608     }
1609 
1610     while (its--) {
1611       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613 
1614       /* update rhs: bb1 = bb - B*x */
1615       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1616       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1617 
1618       /* local sweep */
1619       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1620     }
1621   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626     while (its--) {
1627       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629 
1630       /* update rhs: bb1 = bb - B*x */
1631       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1632       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1633 
1634       /* local sweep */
1635       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1636     }
1637   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1638     if (flag & SOR_ZERO_INITIAL_GUESS) {
1639       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1640       its--;
1641     }
1642     while (its--) {
1643       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645 
1646       /* update rhs: bb1 = bb - B*x */
1647       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1648       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1649 
1650       /* local sweep */
1651       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1652     }
1653   }  else if (flag & SOR_EISENSTAT) {
1654     Vec         xx1;
1655 
1656     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1657     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1658 
1659     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (!mat->diag) {
1662       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1663       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1664     }
1665     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1666     if (hasop) {
1667       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1668     } else {
1669       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1670     }
1671     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1672 
1673     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1674 
1675     /* local sweep */
1676     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1677     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1678     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1679   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1680 
1681   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatPermute_MPIAIJ"
1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1688 {
1689   MPI_Comm       comm;
1690   PetscInt       first,local_rowsize,local_colsize;
1691   const PetscInt *rows;
1692   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1697   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1698   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1699   /* collect the global row permutation and invert it */
1700   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1701   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1702   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1703   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1704   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1705   /* get the local target indices */
1706   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1707   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1708   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1709   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1710   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1711   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1712   /* the column permutation is so much easier;
1713      make a local version of 'colp' and invert it */
1714   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1715   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1716   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1717   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1718   /* now we just get the submatrix */
1719   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1720   /* clean up */
1721   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1722   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1728 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1729 {
1730   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1731   Mat            A = mat->A,B = mat->B;
1732   PetscErrorCode ierr;
1733   PetscReal      isend[5],irecv[5];
1734 
1735   PetscFunctionBegin;
1736   info->block_size     = 1.0;
1737   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1738   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1739   isend[3] = info->memory;  isend[4] = info->mallocs;
1740   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1741   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1742   isend[3] += info->memory;  isend[4] += info->mallocs;
1743   if (flag == MAT_LOCAL) {
1744     info->nz_used      = isend[0];
1745     info->nz_allocated = isend[1];
1746     info->nz_unneeded  = isend[2];
1747     info->memory       = isend[3];
1748     info->mallocs      = isend[4];
1749   } else if (flag == MAT_GLOBAL_MAX) {
1750     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1751     info->nz_used      = irecv[0];
1752     info->nz_allocated = irecv[1];
1753     info->nz_unneeded  = irecv[2];
1754     info->memory       = irecv[3];
1755     info->mallocs      = irecv[4];
1756   } else if (flag == MAT_GLOBAL_SUM) {
1757     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1758     info->nz_used      = irecv[0];
1759     info->nz_allocated = irecv[1];
1760     info->nz_unneeded  = irecv[2];
1761     info->memory       = irecv[3];
1762     info->mallocs      = irecv[4];
1763   }
1764   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1765   info->fill_ratio_needed = 0;
1766   info->factor_mallocs    = 0;
1767 
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #undef __FUNCT__
1772 #define __FUNCT__ "MatSetOption_MPIAIJ"
1773 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1774 {
1775   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   switch (op) {
1780   case MAT_NEW_NONZERO_LOCATIONS:
1781   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1782   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1783   case MAT_KEEP_NONZERO_PATTERN:
1784   case MAT_NEW_NONZERO_LOCATION_ERR:
1785   case MAT_USE_INODES:
1786   case MAT_IGNORE_ZERO_ENTRIES:
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1789     break;
1790   case MAT_ROW_ORIENTED:
1791     a->roworiented = flg;
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_NEW_DIAGONALS:
1796     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1797     break;
1798   case MAT_IGNORE_OFF_PROC_ENTRIES:
1799     a->donotstash = flg;
1800     break;
1801   case MAT_SPD:
1802     A->spd_set                         = PETSC_TRUE;
1803     A->spd                             = flg;
1804     if (flg) {
1805       A->symmetric                     = PETSC_TRUE;
1806       A->structurally_symmetric        = PETSC_TRUE;
1807       A->symmetric_set                 = PETSC_TRUE;
1808       A->structurally_symmetric_set    = PETSC_TRUE;
1809     }
1810     break;
1811   case MAT_SYMMETRIC:
1812     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1813     break;
1814   case MAT_STRUCTURALLY_SYMMETRIC:
1815     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_HERMITIAN:
1818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_SYMMETRY_ETERNAL:
1821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1822     break;
1823   default:
1824     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1825   }
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatGetRow_MPIAIJ"
1831 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1832 {
1833   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1834   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1835   PetscErrorCode ierr;
1836   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1837   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1838   PetscInt       *cmap,*idx_p;
1839 
1840   PetscFunctionBegin;
1841   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1842   mat->getrowactive = PETSC_TRUE;
1843 
1844   if (!mat->rowvalues && (idx || v)) {
1845     /*
1846         allocate enough space to hold information from the longest row.
1847     */
1848     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1849     PetscInt   max = 1,tmp;
1850     for (i=0; i<matin->rmap->n; i++) {
1851       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1852       if (max < tmp) { max = tmp; }
1853     }
1854     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1855   }
1856 
1857   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1858   lrow = row - rstart;
1859 
1860   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1861   if (!v)   {pvA = 0; pvB = 0;}
1862   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1863   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1864   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1865   nztot = nzA + nzB;
1866 
1867   cmap  = mat->garray;
1868   if (v  || idx) {
1869     if (nztot) {
1870       /* Sort by increasing column numbers, assuming A and B already sorted */
1871       PetscInt imark = -1;
1872       if (v) {
1873         *v = v_p = mat->rowvalues;
1874         for (i=0; i<nzB; i++) {
1875           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1876           else break;
1877         }
1878         imark = i;
1879         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1880         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1881       }
1882       if (idx) {
1883         *idx = idx_p = mat->rowindices;
1884         if (imark > -1) {
1885           for (i=0; i<imark; i++) {
1886             idx_p[i] = cmap[cworkB[i]];
1887           }
1888         } else {
1889           for (i=0; i<nzB; i++) {
1890             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1891             else break;
1892           }
1893           imark = i;
1894         }
1895         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1896         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1897       }
1898     } else {
1899       if (idx) *idx = 0;
1900       if (v)   *v   = 0;
1901     }
1902   }
1903   *nz = nztot;
1904   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1905   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1911 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1912 {
1913   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1914 
1915   PetscFunctionBegin;
1916   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1917   aij->getrowactive = PETSC_FALSE;
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatNorm_MPIAIJ"
1923 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1924 {
1925   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1926   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1927   PetscErrorCode ierr;
1928   PetscInt       i,j,cstart = mat->cmap->rstart;
1929   PetscReal      sum = 0.0;
1930   MatScalar      *v;
1931 
1932   PetscFunctionBegin;
1933   if (aij->size == 1) {
1934     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1935   } else {
1936     if (type == NORM_FROBENIUS) {
1937       v = amat->a;
1938       for (i=0; i<amat->nz; i++) {
1939 #if defined(PETSC_USE_COMPLEX)
1940         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1941 #else
1942         sum += (*v)*(*v); v++;
1943 #endif
1944       }
1945       v = bmat->a;
1946       for (i=0; i<bmat->nz; i++) {
1947 #if defined(PETSC_USE_COMPLEX)
1948         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1949 #else
1950         sum += (*v)*(*v); v++;
1951 #endif
1952       }
1953       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1954       *norm = PetscSqrtReal(*norm);
1955     } else if (type == NORM_1) { /* max column norm */
1956       PetscReal *tmp,*tmp2;
1957       PetscInt  *jj,*garray = aij->garray;
1958       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1959       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1960       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1961       *norm = 0.0;
1962       v = amat->a; jj = amat->j;
1963       for (j=0; j<amat->nz; j++) {
1964         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1965       }
1966       v = bmat->a; jj = bmat->j;
1967       for (j=0; j<bmat->nz; j++) {
1968         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1969       }
1970       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1971       for (j=0; j<mat->cmap->N; j++) {
1972         if (tmp2[j] > *norm) *norm = tmp2[j];
1973       }
1974       ierr = PetscFree(tmp);CHKERRQ(ierr);
1975       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1976     } else if (type == NORM_INFINITY) { /* max row norm */
1977       PetscReal ntemp = 0.0;
1978       for (j=0; j<aij->A->rmap->n; j++) {
1979         v = amat->a + amat->i[j];
1980         sum = 0.0;
1981         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1982           sum += PetscAbsScalar(*v); v++;
1983         }
1984         v = bmat->a + bmat->i[j];
1985         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1986           sum += PetscAbsScalar(*v); v++;
1987         }
1988         if (sum > ntemp) ntemp = sum;
1989       }
1990       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1991     } else {
1992       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1993     }
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatTranspose_MPIAIJ"
2000 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2001 {
2002   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2003   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2004   PetscErrorCode ierr;
2005   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2006   PetscInt       cstart=A->cmap->rstart,ncol;
2007   Mat            B;
2008   MatScalar      *array;
2009 
2010   PetscFunctionBegin;
2011   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2012 
2013   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2014   ai = Aloc->i; aj = Aloc->j;
2015   bi = Bloc->i; bj = Bloc->j;
2016   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2017     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2018     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2019     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++){
2021       d_nnz[aj[i]] ++;
2022       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2023     }
2024 
2025     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2026     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2027     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2028     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2029     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2030   } else {
2031     B = *matout;
2032   }
2033 
2034   /* copy over the A part */
2035   array = Aloc->a;
2036   row = A->rmap->rstart;
2037   for (i=0; i<ma; i++) {
2038     ncol = ai[i+1]-ai[i];
2039     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2040     row++; array += ncol; aj += ncol;
2041   }
2042   aj = Aloc->j;
2043   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2044 
2045   /* copy over the B part */
2046   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2047   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2048   array = Bloc->a;
2049   row = A->rmap->rstart;
2050   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2051   cols_tmp = cols;
2052   for (i=0; i<mb; i++) {
2053     ncol = bi[i+1]-bi[i];
2054     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2055     row++; array += ncol; cols_tmp += ncol;
2056   }
2057   ierr = PetscFree(cols);CHKERRQ(ierr);
2058 
2059   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2060   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2061   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2062     *matout = B;
2063   } else {
2064     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2065   }
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2071 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2072 {
2073   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2074   Mat            a = aij->A,b = aij->B;
2075   PetscErrorCode ierr;
2076   PetscInt       s1,s2,s3;
2077 
2078   PetscFunctionBegin;
2079   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2080   if (rr) {
2081     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2082     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2083     /* Overlap communication with computation. */
2084     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2085   }
2086   if (ll) {
2087     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2088     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2089     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2090   }
2091   /* scale  the diagonal block */
2092   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2093 
2094   if (rr) {
2095     /* Do a scatter end and then right scale the off-diagonal block */
2096     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2097     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2098   }
2099 
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 #undef __FUNCT__
2104 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2105 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2106 {
2107   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2108   PetscErrorCode ierr;
2109 
2110   PetscFunctionBegin;
2111   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2112   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2113   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2114   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 #undef __FUNCT__
2118 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2119 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2120 {
2121   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2122   PetscErrorCode ierr;
2123 
2124   PetscFunctionBegin;
2125   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #undef __FUNCT__
2130 #define __FUNCT__ "MatEqual_MPIAIJ"
2131 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2132 {
2133   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2134   Mat            a,b,c,d;
2135   PetscBool      flg;
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   a = matA->A; b = matA->B;
2140   c = matB->A; d = matB->B;
2141 
2142   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2143   if (flg) {
2144     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2145   }
2146   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatCopy_MPIAIJ"
2152 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2153 {
2154   PetscErrorCode ierr;
2155   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2156   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2157 
2158   PetscFunctionBegin;
2159   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2160   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2161     /* because of the column compression in the off-processor part of the matrix a->B,
2162        the number of columns in a->B and b->B may be different, hence we cannot call
2163        the MatCopy() directly on the two parts. If need be, we can provide a more
2164        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2165        then copying the submatrices */
2166     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2167   } else {
2168     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2169     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2170   }
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2176 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2177 {
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatAXPY_MPIAIJ"
2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2188 {
2189   PetscErrorCode ierr;
2190   PetscInt       i;
2191   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2192   PetscBLASInt   bnz,one=1;
2193   Mat_SeqAIJ     *x,*y;
2194 
2195   PetscFunctionBegin;
2196   if (str == SAME_NONZERO_PATTERN) {
2197     PetscScalar alpha = a;
2198     x = (Mat_SeqAIJ *)xx->A->data;
2199     y = (Mat_SeqAIJ *)yy->A->data;
2200     bnz = PetscBLASIntCast(x->nz);
2201     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2202     x = (Mat_SeqAIJ *)xx->B->data;
2203     y = (Mat_SeqAIJ *)yy->B->data;
2204     bnz = PetscBLASIntCast(x->nz);
2205     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2206   } else if (str == SUBSET_NONZERO_PATTERN) {
2207     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2208 
2209     x = (Mat_SeqAIJ *)xx->B->data;
2210     y = (Mat_SeqAIJ *)yy->B->data;
2211     if (y->xtoy && y->XtoY != xx->B) {
2212       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2213       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2214     }
2215     if (!y->xtoy) { /* get xtoy */
2216       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2217       y->XtoY = xx->B;
2218       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2219     }
2220     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2221   } else {
2222     Mat B;
2223     PetscInt *nnz_d,*nnz_o;
2224     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2225     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2226     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2227     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2228     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2229     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2230     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2231     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2232     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2233     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2234     ierr = MatHeaderReplace(Y,B);
2235     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2236     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2237   }
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatConjugate_MPIAIJ"
2245 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2246 {
2247 #if defined(PETSC_USE_COMPLEX)
2248   PetscErrorCode ierr;
2249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2253   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2254 #else
2255   PetscFunctionBegin;
2256 #endif
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatRealPart_MPIAIJ"
2262 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2263 {
2264   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2265   PetscErrorCode ierr;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2269   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2275 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2276 {
2277   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2278   PetscErrorCode ierr;
2279 
2280   PetscFunctionBegin;
2281   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2282   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #ifdef PETSC_HAVE_PBGL
2287 
2288 #include <boost/parallel/mpi/bsp_process_group.hpp>
2289 #include <boost/graph/distributed/ilu_default_graph.hpp>
2290 #include <boost/graph/distributed/ilu_0_block.hpp>
2291 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2292 #include <boost/graph/distributed/petsc/interface.hpp>
2293 #include <boost/multi_array.hpp>
2294 #include <boost/parallel/distributed_property_map->hpp>
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2298 /*
2299   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2300 */
2301 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2302 {
2303   namespace petsc = boost::distributed::petsc;
2304 
2305   namespace graph_dist = boost::graph::distributed;
2306   using boost::graph::distributed::ilu_default::process_group_type;
2307   using boost::graph::ilu_permuted;
2308 
2309   PetscBool       row_identity, col_identity;
2310   PetscContainer  c;
2311   PetscInt        m, n, M, N;
2312   PetscErrorCode  ierr;
2313 
2314   PetscFunctionBegin;
2315   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2316   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2317   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2318   if (!row_identity || !col_identity) {
2319     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2320   }
2321 
2322   process_group_type pg;
2323   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2324   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2325   lgraph_type&   level_graph = *lgraph_p;
2326   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2327 
2328   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2329   ilu_permuted(level_graph);
2330 
2331   /* put together the new matrix */
2332   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2333   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2334   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2335   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2336   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2337   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339 
2340   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2341   ierr = PetscContainerSetPointer(c, lgraph_p);
2342   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2343   ierr = PetscContainerDestroy(&c);
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2349 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2350 {
2351   PetscFunctionBegin;
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatSolve_MPIAIJ"
2357 /*
2358   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2359 */
2360 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2361 {
2362   namespace graph_dist = boost::graph::distributed;
2363 
2364   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2365   lgraph_type*   lgraph_p;
2366   PetscContainer c;
2367   PetscErrorCode ierr;
2368 
2369   PetscFunctionBegin;
2370   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2371   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2372   ierr = VecCopy(b, x);CHKERRQ(ierr);
2373 
2374   PetscScalar* array_x;
2375   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2376   PetscInt sx;
2377   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2378 
2379   PetscScalar* array_b;
2380   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2381   PetscInt sb;
2382   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2383 
2384   lgraph_type&   level_graph = *lgraph_p;
2385   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2386 
2387   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2388   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2389                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2390 
2391   typedef boost::iterator_property_map<array_ref_type::iterator,
2392                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2393   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2394                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2395 
2396   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2397 
2398   PetscFunctionReturn(0);
2399 }
2400 #endif
2401 
2402 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2403   PetscInt       nzlocal,nsends,nrecvs;
2404   PetscMPIInt    *send_rank,*recv_rank;
2405   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2406   PetscScalar    *sbuf_a,**rbuf_a;
2407   PetscErrorCode (*Destroy)(Mat);
2408 } Mat_Redundant;
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2412 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2413 {
2414   PetscErrorCode       ierr;
2415   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2416   PetscInt             i;
2417 
2418   PetscFunctionBegin;
2419   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2420   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2421   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2422   for (i=0; i<redund->nrecvs; i++){
2423     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2424     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2425   }
2426   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2427   ierr = PetscFree(redund);CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "MatDestroy_MatRedundant"
2433 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2434 {
2435   PetscErrorCode  ierr;
2436   PetscContainer  container;
2437   Mat_Redundant   *redund=PETSC_NULL;
2438 
2439   PetscFunctionBegin;
2440   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2441   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2442   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2443   A->ops->destroy = redund->Destroy;
2444   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2445   if (A->ops->destroy) {
2446     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2447   }
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2453 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2454 {
2455   PetscMPIInt    rank,size;
2456   MPI_Comm       comm=((PetscObject)mat)->comm;
2457   PetscErrorCode ierr;
2458   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2459   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2460   PetscInt       *rowrange=mat->rmap->range;
2461   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2462   Mat            A=aij->A,B=aij->B,C=*matredundant;
2463   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2464   PetscScalar    *sbuf_a;
2465   PetscInt       nzlocal=a->nz+b->nz;
2466   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2467   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2468   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2469   MatScalar      *aworkA,*aworkB;
2470   PetscScalar    *vals;
2471   PetscMPIInt    tag1,tag2,tag3,imdex;
2472   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2473                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2474   MPI_Status     recv_status,*send_status;
2475   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2476   PetscInt       **rbuf_j=PETSC_NULL;
2477   PetscScalar    **rbuf_a=PETSC_NULL;
2478   Mat_Redundant  *redund=PETSC_NULL;
2479   PetscContainer container;
2480 
2481   PetscFunctionBegin;
2482   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2483   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2484 
2485   if (reuse == MAT_REUSE_MATRIX) {
2486     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2487     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2488     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2489     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2490     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2491     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2492     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2493     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2494 
2495     nsends    = redund->nsends;
2496     nrecvs    = redund->nrecvs;
2497     send_rank = redund->send_rank;
2498     recv_rank = redund->recv_rank;
2499     sbuf_nz   = redund->sbuf_nz;
2500     rbuf_nz   = redund->rbuf_nz;
2501     sbuf_j    = redund->sbuf_j;
2502     sbuf_a    = redund->sbuf_a;
2503     rbuf_j    = redund->rbuf_j;
2504     rbuf_a    = redund->rbuf_a;
2505   }
2506 
2507   if (reuse == MAT_INITIAL_MATRIX){
2508     PetscMPIInt  subrank,subsize;
2509     PetscInt     nleftover,np_subcomm;
2510     /* get the destination processors' id send_rank, nsends and nrecvs */
2511     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2512     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2513     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2514     np_subcomm = size/nsubcomm;
2515     nleftover  = size - nsubcomm*np_subcomm;
2516     nsends = 0; nrecvs = 0;
2517     for (i=0; i<size; i++){ /* i=rank*/
2518       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2519         send_rank[nsends] = i; nsends++;
2520         recv_rank[nrecvs++] = i;
2521       }
2522     }
2523     if (rank >= size - nleftover){/* this proc is a leftover processor */
2524       i = size-nleftover-1;
2525       j = 0;
2526       while (j < nsubcomm - nleftover){
2527         send_rank[nsends++] = i;
2528         i--; j++;
2529       }
2530     }
2531 
2532     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2533       for (i=0; i<nleftover; i++){
2534         recv_rank[nrecvs++] = size-nleftover+i;
2535       }
2536     }
2537 
2538     /* allocate sbuf_j, sbuf_a */
2539     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2540     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2541     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2542   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2543 
2544   /* copy mat's local entries into the buffers */
2545   if (reuse == MAT_INITIAL_MATRIX){
2546     rownz_max = 0;
2547     rptr = sbuf_j;
2548     cols = sbuf_j + rend-rstart + 1;
2549     vals = sbuf_a;
2550     rptr[0] = 0;
2551     for (i=0; i<rend-rstart; i++){
2552       row = i + rstart;
2553       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2554       ncols  = nzA + nzB;
2555       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2556       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2557       /* load the column indices for this row into cols */
2558       lwrite = 0;
2559       for (l=0; l<nzB; l++) {
2560         if ((ctmp = bmap[cworkB[l]]) < cstart){
2561           vals[lwrite]   = aworkB[l];
2562           cols[lwrite++] = ctmp;
2563         }
2564       }
2565       for (l=0; l<nzA; l++){
2566         vals[lwrite]   = aworkA[l];
2567         cols[lwrite++] = cstart + cworkA[l];
2568       }
2569       for (l=0; l<nzB; l++) {
2570         if ((ctmp = bmap[cworkB[l]]) >= cend){
2571           vals[lwrite]   = aworkB[l];
2572           cols[lwrite++] = ctmp;
2573         }
2574       }
2575       vals += ncols;
2576       cols += ncols;
2577       rptr[i+1] = rptr[i] + ncols;
2578       if (rownz_max < ncols) rownz_max = ncols;
2579     }
2580     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2581   } else { /* only copy matrix values into sbuf_a */
2582     rptr = sbuf_j;
2583     vals = sbuf_a;
2584     rptr[0] = 0;
2585     for (i=0; i<rend-rstart; i++){
2586       row = i + rstart;
2587       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2588       ncols  = nzA + nzB;
2589       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2590       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2591       lwrite = 0;
2592       for (l=0; l<nzB; l++) {
2593         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2594       }
2595       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2596       for (l=0; l<nzB; l++) {
2597         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2598       }
2599       vals += ncols;
2600       rptr[i+1] = rptr[i] + ncols;
2601     }
2602   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2603 
2604   /* send nzlocal to others, and recv other's nzlocal */
2605   /*--------------------------------------------------*/
2606   if (reuse == MAT_INITIAL_MATRIX){
2607     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2608     s_waits2 = s_waits3 + nsends;
2609     s_waits1 = s_waits2 + nsends;
2610     r_waits1 = s_waits1 + nsends;
2611     r_waits2 = r_waits1 + nrecvs;
2612     r_waits3 = r_waits2 + nrecvs;
2613   } else {
2614     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2615     r_waits3 = s_waits3 + nsends;
2616   }
2617 
2618   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2619   if (reuse == MAT_INITIAL_MATRIX){
2620     /* get new tags to keep the communication clean */
2621     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2622     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2623     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2624 
2625     /* post receives of other's nzlocal */
2626     for (i=0; i<nrecvs; i++){
2627       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2628     }
2629     /* send nzlocal to others */
2630     for (i=0; i<nsends; i++){
2631       sbuf_nz[i] = nzlocal;
2632       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2633     }
2634     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2635     count = nrecvs;
2636     while (count) {
2637       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2638       recv_rank[imdex] = recv_status.MPI_SOURCE;
2639       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2640       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2641 
2642       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2643       rbuf_nz[imdex] += i + 2;
2644       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2645       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2646       count--;
2647     }
2648     /* wait on sends of nzlocal */
2649     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2650     /* send mat->i,j to others, and recv from other's */
2651     /*------------------------------------------------*/
2652     for (i=0; i<nsends; i++){
2653       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2654       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2655     }
2656     /* wait on receives of mat->i,j */
2657     /*------------------------------*/
2658     count = nrecvs;
2659     while (count) {
2660       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2661       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2662       count--;
2663     }
2664     /* wait on sends of mat->i,j */
2665     /*---------------------------*/
2666     if (nsends) {
2667       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2668     }
2669   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2670 
2671   /* post receives, send and receive mat->a */
2672   /*----------------------------------------*/
2673   for (imdex=0; imdex<nrecvs; imdex++) {
2674     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2675   }
2676   for (i=0; i<nsends; i++){
2677     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2678   }
2679   count = nrecvs;
2680   while (count) {
2681     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2682     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2683     count--;
2684   }
2685   if (nsends) {
2686     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2687   }
2688 
2689   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2690 
2691   /* create redundant matrix */
2692   /*-------------------------*/
2693   if (reuse == MAT_INITIAL_MATRIX){
2694     /* compute rownz_max for preallocation */
2695     for (imdex=0; imdex<nrecvs; imdex++){
2696       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2697       rptr = rbuf_j[imdex];
2698       for (i=0; i<j; i++){
2699         ncols = rptr[i+1] - rptr[i];
2700         if (rownz_max < ncols) rownz_max = ncols;
2701       }
2702     }
2703 
2704     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2705     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2706     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2707     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2708     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2709   } else {
2710     C = *matredundant;
2711   }
2712 
2713   /* insert local matrix entries */
2714   rptr = sbuf_j;
2715   cols = sbuf_j + rend-rstart + 1;
2716   vals = sbuf_a;
2717   for (i=0; i<rend-rstart; i++){
2718     row   = i + rstart;
2719     ncols = rptr[i+1] - rptr[i];
2720     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2721     vals += ncols;
2722     cols += ncols;
2723   }
2724   /* insert received matrix entries */
2725   for (imdex=0; imdex<nrecvs; imdex++){
2726     rstart = rowrange[recv_rank[imdex]];
2727     rend   = rowrange[recv_rank[imdex]+1];
2728     rptr = rbuf_j[imdex];
2729     cols = rbuf_j[imdex] + rend-rstart + 1;
2730     vals = rbuf_a[imdex];
2731     for (i=0; i<rend-rstart; i++){
2732       row   = i + rstart;
2733       ncols = rptr[i+1] - rptr[i];
2734       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2735       vals += ncols;
2736       cols += ncols;
2737     }
2738   }
2739   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2740   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2741   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2742   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2743   if (reuse == MAT_INITIAL_MATRIX) {
2744     PetscContainer container;
2745     *matredundant = C;
2746     /* create a supporting struct and attach it to C for reuse */
2747     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2748     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2749     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2750     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2751     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2752     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2753 
2754     redund->nzlocal = nzlocal;
2755     redund->nsends  = nsends;
2756     redund->nrecvs  = nrecvs;
2757     redund->send_rank = send_rank;
2758     redund->recv_rank = recv_rank;
2759     redund->sbuf_nz = sbuf_nz;
2760     redund->rbuf_nz = rbuf_nz;
2761     redund->sbuf_j  = sbuf_j;
2762     redund->sbuf_a  = sbuf_a;
2763     redund->rbuf_j  = rbuf_j;
2764     redund->rbuf_a  = rbuf_a;
2765 
2766     redund->Destroy = C->ops->destroy;
2767     C->ops->destroy = MatDestroy_MatRedundant;
2768   }
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2774 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2775 {
2776   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2777   PetscErrorCode ierr;
2778   PetscInt       i,*idxb = 0;
2779   PetscScalar    *va,*vb;
2780   Vec            vtmp;
2781 
2782   PetscFunctionBegin;
2783   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2784   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2785   if (idx) {
2786     for (i=0; i<A->rmap->n; i++) {
2787       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2788     }
2789   }
2790 
2791   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2792   if (idx) {
2793     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2794   }
2795   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2796   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2797 
2798   for (i=0; i<A->rmap->n; i++){
2799     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2800       va[i] = vb[i];
2801       if (idx) idx[i] = a->garray[idxb[i]];
2802     }
2803   }
2804 
2805   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2806   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2807   ierr = PetscFree(idxb);CHKERRQ(ierr);
2808   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 #undef __FUNCT__
2813 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2814 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2815 {
2816   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2817   PetscErrorCode ierr;
2818   PetscInt       i,*idxb = 0;
2819   PetscScalar    *va,*vb;
2820   Vec            vtmp;
2821 
2822   PetscFunctionBegin;
2823   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2824   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2825   if (idx) {
2826     for (i=0; i<A->cmap->n; i++) {
2827       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2828     }
2829   }
2830 
2831   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2832   if (idx) {
2833     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2834   }
2835   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2836   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2837 
2838   for (i=0; i<A->rmap->n; i++){
2839     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2840       va[i] = vb[i];
2841       if (idx) idx[i] = a->garray[idxb[i]];
2842     }
2843   }
2844 
2845   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2846   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2847   ierr = PetscFree(idxb);CHKERRQ(ierr);
2848   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 #undef __FUNCT__
2853 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2854 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2855 {
2856   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2857   PetscInt       n      = A->rmap->n;
2858   PetscInt       cstart = A->cmap->rstart;
2859   PetscInt      *cmap   = mat->garray;
2860   PetscInt      *diagIdx, *offdiagIdx;
2861   Vec            diagV, offdiagV;
2862   PetscScalar   *a, *diagA, *offdiagA;
2863   PetscInt       r;
2864   PetscErrorCode ierr;
2865 
2866   PetscFunctionBegin;
2867   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2868   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2869   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2870   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2871   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2872   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2873   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2874   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2875   for(r = 0; r < n; ++r) {
2876     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2877       a[r]   = diagA[r];
2878       idx[r] = cstart + diagIdx[r];
2879     } else {
2880       a[r]   = offdiagA[r];
2881       idx[r] = cmap[offdiagIdx[r]];
2882     }
2883   }
2884   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2885   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2886   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2887   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2888   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2889   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 #undef __FUNCT__
2894 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2895 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2896 {
2897   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2898   PetscInt       n      = A->rmap->n;
2899   PetscInt       cstart = A->cmap->rstart;
2900   PetscInt      *cmap   = mat->garray;
2901   PetscInt      *diagIdx, *offdiagIdx;
2902   Vec            diagV, offdiagV;
2903   PetscScalar   *a, *diagA, *offdiagA;
2904   PetscInt       r;
2905   PetscErrorCode ierr;
2906 
2907   PetscFunctionBegin;
2908   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2909   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2910   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2911   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2912   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2913   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2914   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2915   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2916   for(r = 0; r < n; ++r) {
2917     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2918       a[r]   = diagA[r];
2919       idx[r] = cstart + diagIdx[r];
2920     } else {
2921       a[r]   = offdiagA[r];
2922       idx[r] = cmap[offdiagIdx[r]];
2923     }
2924   }
2925   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2926   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2928   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2929   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2930   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 #undef __FUNCT__
2935 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2936 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2937 {
2938   PetscErrorCode ierr;
2939   Mat            *dummy;
2940 
2941   PetscFunctionBegin;
2942   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2943   *newmat = *dummy;
2944   ierr = PetscFree(dummy);CHKERRQ(ierr);
2945   PetscFunctionReturn(0);
2946 }
2947 
2948 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2949 
2950 #undef __FUNCT__
2951 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2952 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2953 {
2954   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2955   PetscErrorCode ierr;
2956 
2957   PetscFunctionBegin;
2958   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 
2963 /* -------------------------------------------------------------------*/
2964 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2965        MatGetRow_MPIAIJ,
2966        MatRestoreRow_MPIAIJ,
2967        MatMult_MPIAIJ,
2968 /* 4*/ MatMultAdd_MPIAIJ,
2969        MatMultTranspose_MPIAIJ,
2970        MatMultTransposeAdd_MPIAIJ,
2971 #ifdef PETSC_HAVE_PBGL
2972        MatSolve_MPIAIJ,
2973 #else
2974        0,
2975 #endif
2976        0,
2977        0,
2978 /*10*/ 0,
2979        0,
2980        0,
2981        MatSOR_MPIAIJ,
2982        MatTranspose_MPIAIJ,
2983 /*15*/ MatGetInfo_MPIAIJ,
2984        MatEqual_MPIAIJ,
2985        MatGetDiagonal_MPIAIJ,
2986        MatDiagonalScale_MPIAIJ,
2987        MatNorm_MPIAIJ,
2988 /*20*/ MatAssemblyBegin_MPIAIJ,
2989        MatAssemblyEnd_MPIAIJ,
2990        MatSetOption_MPIAIJ,
2991        MatZeroEntries_MPIAIJ,
2992 /*24*/ MatZeroRows_MPIAIJ,
2993        0,
2994 #ifdef PETSC_HAVE_PBGL
2995        0,
2996 #else
2997        0,
2998 #endif
2999        0,
3000        0,
3001 /*29*/ MatSetUpPreallocation_MPIAIJ,
3002 #ifdef PETSC_HAVE_PBGL
3003        0,
3004 #else
3005        0,
3006 #endif
3007        0,
3008        0,
3009        0,
3010 /*34*/ MatDuplicate_MPIAIJ,
3011        0,
3012        0,
3013        0,
3014        0,
3015 /*39*/ MatAXPY_MPIAIJ,
3016        MatGetSubMatrices_MPIAIJ,
3017        MatIncreaseOverlap_MPIAIJ,
3018        MatGetValues_MPIAIJ,
3019        MatCopy_MPIAIJ,
3020 /*44*/ MatGetRowMax_MPIAIJ,
3021        MatScale_MPIAIJ,
3022        0,
3023        0,
3024        MatZeroRowsColumns_MPIAIJ,
3025 /*49*/ MatSetBlockSize_MPIAIJ,
3026        0,
3027        0,
3028        0,
3029        0,
3030 /*54*/ MatFDColoringCreate_MPIAIJ,
3031        0,
3032        MatSetUnfactored_MPIAIJ,
3033        MatPermute_MPIAIJ,
3034        0,
3035 /*59*/ MatGetSubMatrix_MPIAIJ,
3036        MatDestroy_MPIAIJ,
3037        MatView_MPIAIJ,
3038        0,
3039        0,
3040 /*64*/ 0,
3041        0,
3042        0,
3043        0,
3044        0,
3045 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3046        MatGetRowMinAbs_MPIAIJ,
3047        0,
3048        MatSetColoring_MPIAIJ,
3049 #if defined(PETSC_HAVE_ADIC)
3050        MatSetValuesAdic_MPIAIJ,
3051 #else
3052        0,
3053 #endif
3054        MatSetValuesAdifor_MPIAIJ,
3055 /*75*/ MatFDColoringApply_AIJ,
3056        0,
3057        0,
3058        0,
3059        0,
3060 /*80*/ 0,
3061        0,
3062        0,
3063 /*83*/ MatLoad_MPIAIJ,
3064        0,
3065        0,
3066        0,
3067        0,
3068        0,
3069 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3070        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3071        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3072        MatPtAP_Basic,
3073        MatPtAPSymbolic_MPIAIJ,
3074 /*94*/ MatPtAPNumeric_MPIAIJ,
3075        0,
3076        0,
3077        0,
3078        0,
3079 /*99*/ 0,
3080        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3081        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3082        MatConjugate_MPIAIJ,
3083        0,
3084 /*104*/MatSetValuesRow_MPIAIJ,
3085        MatRealPart_MPIAIJ,
3086        MatImaginaryPart_MPIAIJ,
3087        0,
3088        0,
3089 /*109*/0,
3090        MatGetRedundantMatrix_MPIAIJ,
3091        MatGetRowMin_MPIAIJ,
3092        0,
3093        0,
3094 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3095        0,
3096        0,
3097        0,
3098        0,
3099 /*119*/0,
3100        0,
3101        0,
3102        0,
3103        MatGetMultiProcBlock_MPIAIJ,
3104 /*124*/MatFindNonZeroRows_MPIAIJ,
3105        MatGetColumnNorms_MPIAIJ,
3106        MatInvertBlockDiagonal_MPIAIJ,
3107        0,
3108        MatGetSubMatricesParallel_MPIAIJ,
3109 /*129*/0,
3110        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3111        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3112        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3113        0,
3114 /*134*/0,
3115        0,
3116        0,
3117        0,
3118        0
3119 };
3120 
3121 /* ----------------------------------------------------------------------------------------*/
3122 
3123 EXTERN_C_BEGIN
3124 #undef __FUNCT__
3125 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3126 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3127 {
3128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3129   PetscErrorCode ierr;
3130 
3131   PetscFunctionBegin;
3132   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3133   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3134   PetscFunctionReturn(0);
3135 }
3136 EXTERN_C_END
3137 
3138 EXTERN_C_BEGIN
3139 #undef __FUNCT__
3140 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3141 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3142 {
3143   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3144   PetscErrorCode ierr;
3145 
3146   PetscFunctionBegin;
3147   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3148   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3149   PetscFunctionReturn(0);
3150 }
3151 EXTERN_C_END
3152 
3153 EXTERN_C_BEGIN
3154 #undef __FUNCT__
3155 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3156 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3157 {
3158   Mat_MPIAIJ     *b;
3159   PetscErrorCode ierr;
3160   PetscInt       i;
3161 
3162   PetscFunctionBegin;
3163   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3164   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3165   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3166   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3167 
3168   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3169   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3170   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3171   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3172   if (d_nnz) {
3173     for (i=0; i<B->rmap->n; i++) {
3174       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3175     }
3176   }
3177   if (o_nnz) {
3178     for (i=0; i<B->rmap->n; i++) {
3179       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3180     }
3181   }
3182   b = (Mat_MPIAIJ*)B->data;
3183 
3184   if (!B->preallocated) {
3185     /* Explicitly create 2 MATSEQAIJ matrices. */
3186     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3187     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3188     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3189     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3190     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3191     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3192     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3193     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3194   }
3195 
3196   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3197   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3198   B->preallocated = PETSC_TRUE;
3199   ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3200   PetscFunctionReturn(0);
3201 }
3202 EXTERN_C_END
3203 
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3206 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3207 {
3208   Mat            mat;
3209   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3210   PetscErrorCode ierr;
3211 
3212   PetscFunctionBegin;
3213   *newmat       = 0;
3214   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3215   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3216   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3217   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3218   a    = (Mat_MPIAIJ*)mat->data;
3219 
3220   mat->factortype    = matin->factortype;
3221   mat->rmap->bs      = matin->rmap->bs;
3222   mat->assembled    = PETSC_TRUE;
3223   mat->insertmode   = NOT_SET_VALUES;
3224   mat->preallocated = PETSC_TRUE;
3225 
3226   a->size           = oldmat->size;
3227   a->rank           = oldmat->rank;
3228   a->donotstash     = oldmat->donotstash;
3229   a->roworiented    = oldmat->roworiented;
3230   a->rowindices     = 0;
3231   a->rowvalues      = 0;
3232   a->getrowactive   = PETSC_FALSE;
3233 
3234   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3235   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3236 
3237   if (oldmat->colmap) {
3238 #if defined (PETSC_USE_CTABLE)
3239     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3240 #else
3241     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3242     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3243     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3244 #endif
3245   } else a->colmap = 0;
3246   if (oldmat->garray) {
3247     PetscInt len;
3248     len  = oldmat->B->cmap->n;
3249     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3250     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3251     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3252   } else a->garray = 0;
3253 
3254   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3255   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3256   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3257   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3258   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3259   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3260   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3261   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3262   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3263   *newmat = mat;
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 
3268 
3269 #undef __FUNCT__
3270 #define __FUNCT__ "MatLoad_MPIAIJ"
3271 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3272 {
3273   PetscScalar    *vals,*svals;
3274   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3275   PetscErrorCode ierr;
3276   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3277   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3278   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3279   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3280   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3281   int            fd;
3282 
3283   PetscFunctionBegin;
3284   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3285   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3286   if (!rank) {
3287     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3288     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3289     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3290   }
3291 
3292   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3293 
3294   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3295   M = header[1]; N = header[2];
3296   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3297   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3298   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3299 
3300   /* If global sizes are set, check if they are consistent with that given in the file */
3301   if (sizesset) {
3302     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3303   }
3304   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3305   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3306 
3307   /* determine ownership of all rows */
3308   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3309   else m = newMat->rmap->n; /* Set by user */
3310 
3311   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3312   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3313 
3314   /* First process needs enough room for process with most rows */
3315   if (!rank) {
3316     mmax       = rowners[1];
3317     for (i=2; i<size; i++) {
3318       mmax = PetscMax(mmax,rowners[i]);
3319     }
3320   } else mmax = m;
3321 
3322   rowners[0] = 0;
3323   for (i=2; i<=size; i++) {
3324     rowners[i] += rowners[i-1];
3325   }
3326   rstart = rowners[rank];
3327   rend   = rowners[rank+1];
3328 
3329   /* distribute row lengths to all processors */
3330   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3331   if (!rank) {
3332     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3333     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3334     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3335     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3336     for (j=0; j<m; j++) {
3337       procsnz[0] += ourlens[j];
3338     }
3339     for (i=1; i<size; i++) {
3340       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3341       /* calculate the number of nonzeros on each processor */
3342       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3343         procsnz[i] += rowlengths[j];
3344       }
3345       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3346     }
3347     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3348   } else {
3349     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3350   }
3351 
3352   if (!rank) {
3353     /* determine max buffer needed and allocate it */
3354     maxnz = 0;
3355     for (i=0; i<size; i++) {
3356       maxnz = PetscMax(maxnz,procsnz[i]);
3357     }
3358     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3359 
3360     /* read in my part of the matrix column indices  */
3361     nz   = procsnz[0];
3362     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3363     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3364 
3365     /* read in every one elses and ship off */
3366     for (i=1; i<size; i++) {
3367       nz     = procsnz[i];
3368       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3369       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3370     }
3371     ierr = PetscFree(cols);CHKERRQ(ierr);
3372   } else {
3373     /* determine buffer space needed for message */
3374     nz = 0;
3375     for (i=0; i<m; i++) {
3376       nz += ourlens[i];
3377     }
3378     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3379 
3380     /* receive message of column indices*/
3381     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3382   }
3383 
3384   /* determine column ownership if matrix is not square */
3385   if (N != M) {
3386     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3387     else n = newMat->cmap->n;
3388     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3389     cstart = cend - n;
3390   } else {
3391     cstart = rstart;
3392     cend   = rend;
3393     n      = cend - cstart;
3394   }
3395 
3396   /* loop over local rows, determining number of off diagonal entries */
3397   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3398   jj = 0;
3399   for (i=0; i<m; i++) {
3400     for (j=0; j<ourlens[i]; j++) {
3401       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3402       jj++;
3403     }
3404   }
3405 
3406   for (i=0; i<m; i++) {
3407     ourlens[i] -= offlens[i];
3408   }
3409   if (!sizesset) {
3410     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3411   }
3412   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3413 
3414   for (i=0; i<m; i++) {
3415     ourlens[i] += offlens[i];
3416   }
3417 
3418   if (!rank) {
3419     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3420 
3421     /* read in my part of the matrix numerical values  */
3422     nz   = procsnz[0];
3423     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3424 
3425     /* insert into matrix */
3426     jj      = rstart;
3427     smycols = mycols;
3428     svals   = vals;
3429     for (i=0; i<m; i++) {
3430       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3431       smycols += ourlens[i];
3432       svals   += ourlens[i];
3433       jj++;
3434     }
3435 
3436     /* read in other processors and ship out */
3437     for (i=1; i<size; i++) {
3438       nz     = procsnz[i];
3439       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3440       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3441     }
3442     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3443   } else {
3444     /* receive numeric values */
3445     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3446 
3447     /* receive message of values*/
3448     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3449 
3450     /* insert into matrix */
3451     jj      = rstart;
3452     smycols = mycols;
3453     svals   = vals;
3454     for (i=0; i<m; i++) {
3455       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3456       smycols += ourlens[i];
3457       svals   += ourlens[i];
3458       jj++;
3459     }
3460   }
3461   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3462   ierr = PetscFree(vals);CHKERRQ(ierr);
3463   ierr = PetscFree(mycols);CHKERRQ(ierr);
3464   ierr = PetscFree(rowners);CHKERRQ(ierr);
3465 
3466   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3467   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3468   PetscFunctionReturn(0);
3469 }
3470 
3471 #undef __FUNCT__
3472 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3473 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3474 {
3475   PetscErrorCode ierr;
3476   IS             iscol_local;
3477   PetscInt       csize;
3478 
3479   PetscFunctionBegin;
3480   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3481   if (call == MAT_REUSE_MATRIX) {
3482     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3483     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3484   } else {
3485     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3486   }
3487   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3488   if (call == MAT_INITIAL_MATRIX) {
3489     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3490     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3491   }
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3497 /*
3498     Not great since it makes two copies of the submatrix, first an SeqAIJ
3499   in local and then by concatenating the local matrices the end result.
3500   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3501 
3502   Note: This requires a sequential iscol with all indices.
3503 */
3504 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3505 {
3506   PetscErrorCode ierr;
3507   PetscMPIInt    rank,size;
3508   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3509   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3510   Mat            *local,M,Mreuse;
3511   MatScalar      *vwork,*aa;
3512   MPI_Comm       comm = ((PetscObject)mat)->comm;
3513   Mat_SeqAIJ     *aij;
3514 
3515 
3516   PetscFunctionBegin;
3517   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3518   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3519 
3520   if (call ==  MAT_REUSE_MATRIX) {
3521     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3522     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3523     local = &Mreuse;
3524     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3525   } else {
3526     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3527     Mreuse = *local;
3528     ierr   = PetscFree(local);CHKERRQ(ierr);
3529   }
3530 
3531   /*
3532       m - number of local rows
3533       n - number of columns (same on all processors)
3534       rstart - first row in new global matrix generated
3535   */
3536   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3537   if (call == MAT_INITIAL_MATRIX) {
3538     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3539     ii  = aij->i;
3540     jj  = aij->j;
3541 
3542     /*
3543         Determine the number of non-zeros in the diagonal and off-diagonal
3544         portions of the matrix in order to do correct preallocation
3545     */
3546 
3547     /* first get start and end of "diagonal" columns */
3548     if (csize == PETSC_DECIDE) {
3549       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3550       if (mglobal == n) { /* square matrix */
3551 	nlocal = m;
3552       } else {
3553         nlocal = n/size + ((n % size) > rank);
3554       }
3555     } else {
3556       nlocal = csize;
3557     }
3558     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3559     rstart = rend - nlocal;
3560     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3561 
3562     /* next, compute all the lengths */
3563     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3564     olens = dlens + m;
3565     for (i=0; i<m; i++) {
3566       jend = ii[i+1] - ii[i];
3567       olen = 0;
3568       dlen = 0;
3569       for (j=0; j<jend; j++) {
3570         if (*jj < rstart || *jj >= rend) olen++;
3571         else dlen++;
3572         jj++;
3573       }
3574       olens[i] = olen;
3575       dlens[i] = dlen;
3576     }
3577     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3578     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3579     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3580     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3581     ierr = PetscFree(dlens);CHKERRQ(ierr);
3582   } else {
3583     PetscInt ml,nl;
3584 
3585     M = *newmat;
3586     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3587     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3588     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3589     /*
3590          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3591        rather than the slower MatSetValues().
3592     */
3593     M->was_assembled = PETSC_TRUE;
3594     M->assembled     = PETSC_FALSE;
3595   }
3596   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3597   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3598   ii  = aij->i;
3599   jj  = aij->j;
3600   aa  = aij->a;
3601   for (i=0; i<m; i++) {
3602     row   = rstart + i;
3603     nz    = ii[i+1] - ii[i];
3604     cwork = jj;     jj += nz;
3605     vwork = aa;     aa += nz;
3606     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3607   }
3608 
3609   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   *newmat = M;
3612 
3613   /* save submatrix used in processor for next request */
3614   if (call ==  MAT_INITIAL_MATRIX) {
3615     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3616     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3617   }
3618 
3619   PetscFunctionReturn(0);
3620 }
3621 
3622 EXTERN_C_BEGIN
3623 #undef __FUNCT__
3624 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3625 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3626 {
3627   PetscInt       m,cstart, cend,j,nnz,i,d;
3628   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3629   const PetscInt *JJ;
3630   PetscScalar    *values;
3631   PetscErrorCode ierr;
3632 
3633   PetscFunctionBegin;
3634   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3635 
3636   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3637   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3638   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3639   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3640   m      = B->rmap->n;
3641   cstart = B->cmap->rstart;
3642   cend   = B->cmap->rend;
3643   rstart = B->rmap->rstart;
3644 
3645   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3646 
3647 #if defined(PETSC_USE_DEBUGGING)
3648   for (i=0; i<m; i++) {
3649     nnz     = Ii[i+1]- Ii[i];
3650     JJ      = J + Ii[i];
3651     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3652     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3653     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3654   }
3655 #endif
3656 
3657   for (i=0; i<m; i++) {
3658     nnz     = Ii[i+1]- Ii[i];
3659     JJ      = J + Ii[i];
3660     nnz_max = PetscMax(nnz_max,nnz);
3661     d       = 0;
3662     for (j=0; j<nnz; j++) {
3663       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3664     }
3665     d_nnz[i] = d;
3666     o_nnz[i] = nnz - d;
3667   }
3668   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3669   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3670 
3671   if (v) values = (PetscScalar*)v;
3672   else {
3673     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3674     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3675   }
3676 
3677   for (i=0; i<m; i++) {
3678     ii   = i + rstart;
3679     nnz  = Ii[i+1]- Ii[i];
3680     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3681   }
3682   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3683   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684 
3685   if (!v) {
3686     ierr = PetscFree(values);CHKERRQ(ierr);
3687   }
3688   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3689   PetscFunctionReturn(0);
3690 }
3691 EXTERN_C_END
3692 
3693 #undef __FUNCT__
3694 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3695 /*@
3696    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3697    (the default parallel PETSc format).
3698 
3699    Collective on MPI_Comm
3700 
3701    Input Parameters:
3702 +  B - the matrix
3703 .  i - the indices into j for the start of each local row (starts with zero)
3704 .  j - the column indices for each local row (starts with zero)
3705 -  v - optional values in the matrix
3706 
3707    Level: developer
3708 
3709    Notes:
3710        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3711      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3712      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3713 
3714        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3715 
3716        The format which is used for the sparse matrix input, is equivalent to a
3717     row-major ordering.. i.e for the following matrix, the input data expected is
3718     as shown:
3719 
3720         1 0 0
3721         2 0 3     P0
3722        -------
3723         4 5 6     P1
3724 
3725      Process0 [P0]: rows_owned=[0,1]
3726         i =  {0,1,3}  [size = nrow+1  = 2+1]
3727         j =  {0,0,2}  [size = nz = 6]
3728         v =  {1,2,3}  [size = nz = 6]
3729 
3730      Process1 [P1]: rows_owned=[2]
3731         i =  {0,3}    [size = nrow+1  = 1+1]
3732         j =  {0,1,2}  [size = nz = 6]
3733         v =  {4,5,6}  [size = nz = 6]
3734 
3735 .keywords: matrix, aij, compressed row, sparse, parallel
3736 
3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3738           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3739 @*/
3740 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3741 {
3742   PetscErrorCode ierr;
3743 
3744   PetscFunctionBegin;
3745   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3746   PetscFunctionReturn(0);
3747 }
3748 
3749 #undef __FUNCT__
3750 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3751 /*@C
3752    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3753    (the default parallel PETSc format).  For good matrix assembly performance
3754    the user should preallocate the matrix storage by setting the parameters
3755    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3756    performance can be increased by more than a factor of 50.
3757 
3758    Collective on MPI_Comm
3759 
3760    Input Parameters:
3761 +  A - the matrix
3762 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3763            (same value is used for all local rows)
3764 .  d_nnz - array containing the number of nonzeros in the various rows of the
3765            DIAGONAL portion of the local submatrix (possibly different for each row)
3766            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3767            The size of this array is equal to the number of local rows, i.e 'm'.
3768            You must leave room for the diagonal entry even if it is zero.
3769 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3770            submatrix (same value is used for all local rows).
3771 -  o_nnz - array containing the number of nonzeros in the various rows of the
3772            OFF-DIAGONAL portion of the local submatrix (possibly different for
3773            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3774            structure. The size of this array is equal to the number
3775            of local rows, i.e 'm'.
3776 
3777    If the *_nnz parameter is given then the *_nz parameter is ignored
3778 
3779    The AIJ format (also called the Yale sparse matrix format or
3780    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3781    storage.  The stored row and column indices begin with zero.
3782    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3783 
3784    The parallel matrix is partitioned such that the first m0 rows belong to
3785    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3786    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3787 
3788    The DIAGONAL portion of the local submatrix of a processor can be defined
3789    as the submatrix which is obtained by extraction the part corresponding to
3790    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3791    first row that belongs to the processor, r2 is the last row belonging to
3792    the this processor, and c1-c2 is range of indices of the local part of a
3793    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3794    common case of a square matrix, the row and column ranges are the same and
3795    the DIAGONAL part is also square. The remaining portion of the local
3796    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3797 
3798    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3799 
3800    You can call MatGetInfo() to get information on how effective the preallocation was;
3801    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3802    You can also run with the option -info and look for messages with the string
3803    malloc in them to see if additional memory allocation was needed.
3804 
3805    Example usage:
3806 
3807    Consider the following 8x8 matrix with 34 non-zero values, that is
3808    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3809    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3810    as follows:
3811 
3812 .vb
3813             1  2  0  |  0  3  0  |  0  4
3814     Proc0   0  5  6  |  7  0  0  |  8  0
3815             9  0 10  | 11  0  0  | 12  0
3816     -------------------------------------
3817            13  0 14  | 15 16 17  |  0  0
3818     Proc1   0 18  0  | 19 20 21  |  0  0
3819             0  0  0  | 22 23  0  | 24  0
3820     -------------------------------------
3821     Proc2  25 26 27  |  0  0 28  | 29  0
3822            30  0  0  | 31 32 33  |  0 34
3823 .ve
3824 
3825    This can be represented as a collection of submatrices as:
3826 
3827 .vb
3828       A B C
3829       D E F
3830       G H I
3831 .ve
3832 
3833    Where the submatrices A,B,C are owned by proc0, D,E,F are
3834    owned by proc1, G,H,I are owned by proc2.
3835 
3836    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3837    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3838    The 'M','N' parameters are 8,8, and have the same values on all procs.
3839 
3840    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3841    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3842    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3843    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3844    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3845    matrix, ans [DF] as another SeqAIJ matrix.
3846 
3847    When d_nz, o_nz parameters are specified, d_nz storage elements are
3848    allocated for every row of the local diagonal submatrix, and o_nz
3849    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3850    One way to choose d_nz and o_nz is to use the max nonzerors per local
3851    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3852    In this case, the values of d_nz,o_nz are:
3853 .vb
3854      proc0 : dnz = 2, o_nz = 2
3855      proc1 : dnz = 3, o_nz = 2
3856      proc2 : dnz = 1, o_nz = 4
3857 .ve
3858    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3859    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3860    for proc3. i.e we are using 12+15+10=37 storage locations to store
3861    34 values.
3862 
3863    When d_nnz, o_nnz parameters are specified, the storage is specified
3864    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3865    In the above case the values for d_nnz,o_nnz are:
3866 .vb
3867      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3868      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3869      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3870 .ve
3871    Here the space allocated is sum of all the above values i.e 34, and
3872    hence pre-allocation is perfect.
3873 
3874    Level: intermediate
3875 
3876 .keywords: matrix, aij, compressed row, sparse, parallel
3877 
3878 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3879           MPIAIJ, MatGetInfo()
3880 @*/
3881 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3882 {
3883   PetscErrorCode ierr;
3884 
3885   PetscFunctionBegin;
3886   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3887   PetscValidType(B,1);
3888   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3889   PetscFunctionReturn(0);
3890 }
3891 
3892 #undef __FUNCT__
3893 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3894 /*@
3895      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3896          CSR format the local rows.
3897 
3898    Collective on MPI_Comm
3899 
3900    Input Parameters:
3901 +  comm - MPI communicator
3902 .  m - number of local rows (Cannot be PETSC_DECIDE)
3903 .  n - This value should be the same as the local size used in creating the
3904        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3905        calculated if N is given) For square matrices n is almost always m.
3906 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3907 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3908 .   i - row indices
3909 .   j - column indices
3910 -   a - matrix values
3911 
3912    Output Parameter:
3913 .   mat - the matrix
3914 
3915    Level: intermediate
3916 
3917    Notes:
3918        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3919      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3920      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3921 
3922        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3923 
3924        The format which is used for the sparse matrix input, is equivalent to a
3925     row-major ordering.. i.e for the following matrix, the input data expected is
3926     as shown:
3927 
3928         1 0 0
3929         2 0 3     P0
3930        -------
3931         4 5 6     P1
3932 
3933      Process0 [P0]: rows_owned=[0,1]
3934         i =  {0,1,3}  [size = nrow+1  = 2+1]
3935         j =  {0,0,2}  [size = nz = 6]
3936         v =  {1,2,3}  [size = nz = 6]
3937 
3938      Process1 [P1]: rows_owned=[2]
3939         i =  {0,3}    [size = nrow+1  = 1+1]
3940         j =  {0,1,2}  [size = nz = 6]
3941         v =  {4,5,6}  [size = nz = 6]
3942 
3943 .keywords: matrix, aij, compressed row, sparse, parallel
3944 
3945 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3946           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3947 @*/
3948 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3949 {
3950   PetscErrorCode ierr;
3951 
3952  PetscFunctionBegin;
3953   if (i[0]) {
3954     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3955   }
3956   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3957   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3958   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3959   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3960   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3961   PetscFunctionReturn(0);
3962 }
3963 
3964 #undef __FUNCT__
3965 #define __FUNCT__ "MatCreateMPIAIJ"
3966 /*@C
3967    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3968    (the default parallel PETSc format).  For good matrix assembly performance
3969    the user should preallocate the matrix storage by setting the parameters
3970    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3971    performance can be increased by more than a factor of 50.
3972 
3973    Collective on MPI_Comm
3974 
3975    Input Parameters:
3976 +  comm - MPI communicator
3977 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3978            This value should be the same as the local size used in creating the
3979            y vector for the matrix-vector product y = Ax.
3980 .  n - This value should be the same as the local size used in creating the
3981        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3982        calculated if N is given) For square matrices n is almost always m.
3983 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3984 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3985 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3986            (same value is used for all local rows)
3987 .  d_nnz - array containing the number of nonzeros in the various rows of the
3988            DIAGONAL portion of the local submatrix (possibly different for each row)
3989            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3990            The size of this array is equal to the number of local rows, i.e 'm'.
3991            You must leave room for the diagonal entry even if it is zero.
3992 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3993            submatrix (same value is used for all local rows).
3994 -  o_nnz - array containing the number of nonzeros in the various rows of the
3995            OFF-DIAGONAL portion of the local submatrix (possibly different for
3996            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3997            structure. The size of this array is equal to the number
3998            of local rows, i.e 'm'.
3999 
4000    Output Parameter:
4001 .  A - the matrix
4002 
4003    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4004    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4005    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4006 
4007    Notes:
4008    If the *_nnz parameter is given then the *_nz parameter is ignored
4009 
4010    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4011    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4012    storage requirements for this matrix.
4013 
4014    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4015    processor than it must be used on all processors that share the object for
4016    that argument.
4017 
4018    The user MUST specify either the local or global matrix dimensions
4019    (possibly both).
4020 
4021    The parallel matrix is partitioned across processors such that the
4022    first m0 rows belong to process 0, the next m1 rows belong to
4023    process 1, the next m2 rows belong to process 2 etc.. where
4024    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4025    values corresponding to [m x N] submatrix.
4026 
4027    The columns are logically partitioned with the n0 columns belonging
4028    to 0th partition, the next n1 columns belonging to the next
4029    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4030 
4031    The DIAGONAL portion of the local submatrix on any given processor
4032    is the submatrix corresponding to the rows and columns m,n
4033    corresponding to the given processor. i.e diagonal matrix on
4034    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4035    etc. The remaining portion of the local submatrix [m x (N-n)]
4036    constitute the OFF-DIAGONAL portion. The example below better
4037    illustrates this concept.
4038 
4039    For a square global matrix we define each processor's diagonal portion
4040    to be its local rows and the corresponding columns (a square submatrix);
4041    each processor's off-diagonal portion encompasses the remainder of the
4042    local matrix (a rectangular submatrix).
4043 
4044    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4045 
4046    When calling this routine with a single process communicator, a matrix of
4047    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4048    type of communicator, use the construction mechanism:
4049      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4050 
4051    By default, this format uses inodes (identical nodes) when possible.
4052    We search for consecutive rows with the same nonzero structure, thereby
4053    reusing matrix information to achieve increased efficiency.
4054 
4055    Options Database Keys:
4056 +  -mat_no_inode  - Do not use inodes
4057 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4058 -  -mat_aij_oneindex - Internally use indexing starting at 1
4059         rather than 0.  Note that when calling MatSetValues(),
4060         the user still MUST index entries starting at 0!
4061 
4062 
4063    Example usage:
4064 
4065    Consider the following 8x8 matrix with 34 non-zero values, that is
4066    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4067    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4068    as follows:
4069 
4070 .vb
4071             1  2  0  |  0  3  0  |  0  4
4072     Proc0   0  5  6  |  7  0  0  |  8  0
4073             9  0 10  | 11  0  0  | 12  0
4074     -------------------------------------
4075            13  0 14  | 15 16 17  |  0  0
4076     Proc1   0 18  0  | 19 20 21  |  0  0
4077             0  0  0  | 22 23  0  | 24  0
4078     -------------------------------------
4079     Proc2  25 26 27  |  0  0 28  | 29  0
4080            30  0  0  | 31 32 33  |  0 34
4081 .ve
4082 
4083    This can be represented as a collection of submatrices as:
4084 
4085 .vb
4086       A B C
4087       D E F
4088       G H I
4089 .ve
4090 
4091    Where the submatrices A,B,C are owned by proc0, D,E,F are
4092    owned by proc1, G,H,I are owned by proc2.
4093 
4094    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4095    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4096    The 'M','N' parameters are 8,8, and have the same values on all procs.
4097 
4098    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4099    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4100    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4101    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4102    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4103    matrix, ans [DF] as another SeqAIJ matrix.
4104 
4105    When d_nz, o_nz parameters are specified, d_nz storage elements are
4106    allocated for every row of the local diagonal submatrix, and o_nz
4107    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4108    One way to choose d_nz and o_nz is to use the max nonzerors per local
4109    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4110    In this case, the values of d_nz,o_nz are:
4111 .vb
4112      proc0 : dnz = 2, o_nz = 2
4113      proc1 : dnz = 3, o_nz = 2
4114      proc2 : dnz = 1, o_nz = 4
4115 .ve
4116    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4117    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4118    for proc3. i.e we are using 12+15+10=37 storage locations to store
4119    34 values.
4120 
4121    When d_nnz, o_nnz parameters are specified, the storage is specified
4122    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4123    In the above case the values for d_nnz,o_nnz are:
4124 .vb
4125      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4126      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4127      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4128 .ve
4129    Here the space allocated is sum of all the above values i.e 34, and
4130    hence pre-allocation is perfect.
4131 
4132    Level: intermediate
4133 
4134 .keywords: matrix, aij, compressed row, sparse, parallel
4135 
4136 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4137           MPIAIJ, MatCreateMPIAIJWithArrays()
4138 @*/
4139 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4140 {
4141   PetscErrorCode ierr;
4142   PetscMPIInt    size;
4143 
4144   PetscFunctionBegin;
4145   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4146   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4147   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4148   if (size > 1) {
4149     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4150     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4151   } else {
4152     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4153     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4154   }
4155   PetscFunctionReturn(0);
4156 }
4157 
4158 #undef __FUNCT__
4159 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4160 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4161 {
4162   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4163 
4164   PetscFunctionBegin;
4165   *Ad     = a->A;
4166   *Ao     = a->B;
4167   *colmap = a->garray;
4168   PetscFunctionReturn(0);
4169 }
4170 
4171 #undef __FUNCT__
4172 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4173 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4174 {
4175   PetscErrorCode ierr;
4176   PetscInt       i;
4177   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4178 
4179   PetscFunctionBegin;
4180   if (coloring->ctype == IS_COLORING_GLOBAL) {
4181     ISColoringValue *allcolors,*colors;
4182     ISColoring      ocoloring;
4183 
4184     /* set coloring for diagonal portion */
4185     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4186 
4187     /* set coloring for off-diagonal portion */
4188     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4189     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4190     for (i=0; i<a->B->cmap->n; i++) {
4191       colors[i] = allcolors[a->garray[i]];
4192     }
4193     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4194     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4195     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4196     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4197   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4198     ISColoringValue *colors;
4199     PetscInt        *larray;
4200     ISColoring      ocoloring;
4201 
4202     /* set coloring for diagonal portion */
4203     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4204     for (i=0; i<a->A->cmap->n; i++) {
4205       larray[i] = i + A->cmap->rstart;
4206     }
4207     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4208     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4209     for (i=0; i<a->A->cmap->n; i++) {
4210       colors[i] = coloring->colors[larray[i]];
4211     }
4212     ierr = PetscFree(larray);CHKERRQ(ierr);
4213     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4214     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4215     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4216 
4217     /* set coloring for off-diagonal portion */
4218     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4219     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4220     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4221     for (i=0; i<a->B->cmap->n; i++) {
4222       colors[i] = coloring->colors[larray[i]];
4223     }
4224     ierr = PetscFree(larray);CHKERRQ(ierr);
4225     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4226     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4227     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4228   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4229 
4230   PetscFunctionReturn(0);
4231 }
4232 
4233 #if defined(PETSC_HAVE_ADIC)
4234 #undef __FUNCT__
4235 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4236 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4237 {
4238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4239   PetscErrorCode ierr;
4240 
4241   PetscFunctionBegin;
4242   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4243   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4244   PetscFunctionReturn(0);
4245 }
4246 #endif
4247 
4248 #undef __FUNCT__
4249 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4250 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4251 {
4252   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4253   PetscErrorCode ierr;
4254 
4255   PetscFunctionBegin;
4256   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4257   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4258   PetscFunctionReturn(0);
4259 }
4260 
4261 #undef __FUNCT__
4262 #define __FUNCT__ "MatMergeSymbolic"
4263 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4264 {
4265   PetscErrorCode ierr;
4266   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4267   PetscInt       *indx;
4268 
4269   PetscFunctionBegin;
4270   /* This routine will ONLY return MPIAIJ type matrix */
4271   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4272   if (n == PETSC_DECIDE){
4273     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4274   }
4275   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4276   rstart -= m;
4277 
4278   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4279   for (i=0;i<m;i++) {
4280     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4281     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4282     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4283   }
4284 
4285   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4286   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4287   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4288   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4289   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4290   PetscFunctionReturn(0);
4291 }
4292 
4293 #undef __FUNCT__
4294 #define __FUNCT__ "MatMergeNumeric"
4295 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4296 {
4297   PetscErrorCode ierr;
4298   PetscInt       m,N,i,rstart,nnz,Ii;
4299   PetscInt       *indx;
4300   PetscScalar    *values;
4301 
4302   PetscFunctionBegin;
4303   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4304   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4305   for (i=0;i<m;i++) {
4306     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4307     Ii    = i + rstart;
4308     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4309     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4310   }
4311   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4312   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4313   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4314   PetscFunctionReturn(0);
4315 }
4316 
4317 #undef __FUNCT__
4318 #define __FUNCT__ "MatMerge"
4319 /*@
4320       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4321                  matrices from each processor
4322 
4323     Collective on MPI_Comm
4324 
4325    Input Parameters:
4326 +    comm - the communicators the parallel matrix will live on
4327 .    inmat - the input sequential matrices
4328 .    n - number of local columns (or PETSC_DECIDE)
4329 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4330 
4331    Output Parameter:
4332 .    outmat - the parallel matrix generated
4333 
4334     Level: advanced
4335 
4336    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4337 
4338 @*/
4339 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4340 {
4341   PetscErrorCode ierr;
4342 
4343   PetscFunctionBegin;
4344   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4345   if (scall == MAT_INITIAL_MATRIX){
4346     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4347   }
4348   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4349   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4350   PetscFunctionReturn(0);
4351 }
4352 
4353 #undef __FUNCT__
4354 #define __FUNCT__ "MatFileSplit"
4355 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4356 {
4357   PetscErrorCode    ierr;
4358   PetscMPIInt       rank;
4359   PetscInt          m,N,i,rstart,nnz;
4360   size_t            len;
4361   const PetscInt    *indx;
4362   PetscViewer       out;
4363   char              *name;
4364   Mat               B;
4365   const PetscScalar *values;
4366 
4367   PetscFunctionBegin;
4368   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4369   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4370   /* Should this be the type of the diagonal block of A? */
4371   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4372   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4373   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4374   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4375   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4376   for (i=0;i<m;i++) {
4377     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4378     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4379     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4380   }
4381   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4382   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4383 
4384   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4385   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4386   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4387   sprintf(name,"%s.%d",outfile,rank);
4388   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4389   ierr = PetscFree(name);
4390   ierr = MatView(B,out);CHKERRQ(ierr);
4391   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4392   ierr = MatDestroy(&B);CHKERRQ(ierr);
4393   PetscFunctionReturn(0);
4394 }
4395 
4396 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4397 #undef __FUNCT__
4398 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4399 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4400 {
4401   PetscErrorCode       ierr;
4402   Mat_Merge_SeqsToMPI  *merge;
4403   PetscContainer       container;
4404 
4405   PetscFunctionBegin;
4406   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4407   if (container) {
4408     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4409     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4410     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4411     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4412     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4413     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4414     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4415     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4421     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4422     ierr = PetscFree(merge);CHKERRQ(ierr);
4423     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4424   }
4425   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 #include <../src/mat/utils/freespace.h>
4430 #include <petscbt.h>
4431 
4432 #undef __FUNCT__
4433 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4434 /*@C
4435       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4436                  matrices from each processor
4437 
4438     Collective on MPI_Comm
4439 
4440    Input Parameters:
4441 +    comm - the communicators the parallel matrix will live on
4442 .    seqmat - the input sequential matrices
4443 .    m - number of local rows (or PETSC_DECIDE)
4444 .    n - number of local columns (or PETSC_DECIDE)
4445 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4446 
4447    Output Parameter:
4448 .    mpimat - the parallel matrix generated
4449 
4450     Level: advanced
4451 
4452    Notes:
4453      The dimensions of the sequential matrix in each processor MUST be the same.
4454      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4455      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4456 @*/
4457 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4458 {
4459   PetscErrorCode       ierr;
4460   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4461   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4462   PetscMPIInt          size,rank,taga,*len_s;
4463   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4464   PetscInt             proc,m;
4465   PetscInt             **buf_ri,**buf_rj;
4466   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4467   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4468   MPI_Request          *s_waits,*r_waits;
4469   MPI_Status           *status;
4470   MatScalar            *aa=a->a;
4471   MatScalar            **abuf_r,*ba_i;
4472   Mat_Merge_SeqsToMPI  *merge;
4473   PetscContainer       container;
4474 
4475   PetscFunctionBegin;
4476   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4477 
4478   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4479   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4480 
4481   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4482   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4483 
4484   bi     = merge->bi;
4485   bj     = merge->bj;
4486   buf_ri = merge->buf_ri;
4487   buf_rj = merge->buf_rj;
4488 
4489   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4490   owners = merge->rowmap->range;
4491   len_s  = merge->len_s;
4492 
4493   /* send and recv matrix values */
4494   /*-----------------------------*/
4495   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4496   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4497 
4498   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4499   for (proc=0,k=0; proc<size; proc++){
4500     if (!len_s[proc]) continue;
4501     i = owners[proc];
4502     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4503     k++;
4504   }
4505 
4506   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4507   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4508   ierr = PetscFree(status);CHKERRQ(ierr);
4509 
4510   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4511   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4512 
4513   /* insert mat values of mpimat */
4514   /*----------------------------*/
4515   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4516   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4517 
4518   for (k=0; k<merge->nrecv; k++){
4519     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4520     nrows = *(buf_ri_k[k]);
4521     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4522     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4523   }
4524 
4525   /* set values of ba */
4526   m = merge->rowmap->n;
4527   for (i=0; i<m; i++) {
4528     arow = owners[rank] + i;
4529     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4530     bnzi = bi[i+1] - bi[i];
4531     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4532 
4533     /* add local non-zero vals of this proc's seqmat into ba */
4534     anzi = ai[arow+1] - ai[arow];
4535     aj   = a->j + ai[arow];
4536     aa   = a->a + ai[arow];
4537     nextaj = 0;
4538     for (j=0; nextaj<anzi; j++){
4539       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4540         ba_i[j] += aa[nextaj++];
4541       }
4542     }
4543 
4544     /* add received vals into ba */
4545     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4546       /* i-th row */
4547       if (i == *nextrow[k]) {
4548         anzi = *(nextai[k]+1) - *nextai[k];
4549         aj   = buf_rj[k] + *(nextai[k]);
4550         aa   = abuf_r[k] + *(nextai[k]);
4551         nextaj = 0;
4552         for (j=0; nextaj<anzi; j++){
4553           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4554             ba_i[j] += aa[nextaj++];
4555           }
4556         }
4557         nextrow[k]++; nextai[k]++;
4558       }
4559     }
4560     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4561   }
4562   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4563   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4564 
4565   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4566   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4567   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4568   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4569   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4570   PetscFunctionReturn(0);
4571 }
4572 
4573 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4574 
4575 #undef __FUNCT__
4576 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4577 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4578 {
4579   PetscErrorCode       ierr;
4580   Mat                  B_mpi;
4581   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4582   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4583   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4584   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4585   PetscInt             len,proc,*dnz,*onz;
4586   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4587   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4588   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4589   MPI_Status           *status;
4590   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4591   PetscBT              lnkbt;
4592   Mat_Merge_SeqsToMPI  *merge;
4593   PetscContainer       container;
4594 
4595   PetscFunctionBegin;
4596   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4597 
4598   /* make sure it is a PETSc comm */
4599   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4601   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4602 
4603   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4604   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4605 
4606   /* determine row ownership */
4607   /*---------------------------------------------------------*/
4608   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4609   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4610   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4611   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4612   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4613   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4614   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4615 
4616   m      = merge->rowmap->n;
4617   M      = merge->rowmap->N;
4618   owners = merge->rowmap->range;
4619 
4620   /* determine the number of messages to send, their lengths */
4621   /*---------------------------------------------------------*/
4622   len_s  = merge->len_s;
4623 
4624   len = 0;  /* length of buf_si[] */
4625   merge->nsend = 0;
4626   for (proc=0; proc<size; proc++){
4627     len_si[proc] = 0;
4628     if (proc == rank){
4629       len_s[proc] = 0;
4630     } else {
4631       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4632       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4633     }
4634     if (len_s[proc]) {
4635       merge->nsend++;
4636       nrows = 0;
4637       for (i=owners[proc]; i<owners[proc+1]; i++){
4638         if (ai[i+1] > ai[i]) nrows++;
4639       }
4640       len_si[proc] = 2*(nrows+1);
4641       len += len_si[proc];
4642     }
4643   }
4644 
4645   /* determine the number and length of messages to receive for ij-structure */
4646   /*-------------------------------------------------------------------------*/
4647   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4648   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4649 
4650   /* post the Irecv of j-structure */
4651   /*-------------------------------*/
4652   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4653   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4654 
4655   /* post the Isend of j-structure */
4656   /*--------------------------------*/
4657   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4658 
4659   for (proc=0, k=0; proc<size; proc++){
4660     if (!len_s[proc]) continue;
4661     i = owners[proc];
4662     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4663     k++;
4664   }
4665 
4666   /* receives and sends of j-structure are complete */
4667   /*------------------------------------------------*/
4668   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4669   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4670 
4671   /* send and recv i-structure */
4672   /*---------------------------*/
4673   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4674   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4675 
4676   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4677   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4678   for (proc=0,k=0; proc<size; proc++){
4679     if (!len_s[proc]) continue;
4680     /* form outgoing message for i-structure:
4681          buf_si[0]:                 nrows to be sent
4682                [1:nrows]:           row index (global)
4683                [nrows+1:2*nrows+1]: i-structure index
4684     */
4685     /*-------------------------------------------*/
4686     nrows = len_si[proc]/2 - 1;
4687     buf_si_i    = buf_si + nrows+1;
4688     buf_si[0]   = nrows;
4689     buf_si_i[0] = 0;
4690     nrows = 0;
4691     for (i=owners[proc]; i<owners[proc+1]; i++){
4692       anzi = ai[i+1] - ai[i];
4693       if (anzi) {
4694         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4695         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4696         nrows++;
4697       }
4698     }
4699     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4700     k++;
4701     buf_si += len_si[proc];
4702   }
4703 
4704   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4705   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4706 
4707   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4708   for (i=0; i<merge->nrecv; i++){
4709     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4710   }
4711 
4712   ierr = PetscFree(len_si);CHKERRQ(ierr);
4713   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4714   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4715   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4716   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4717   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4718   ierr = PetscFree(status);CHKERRQ(ierr);
4719 
4720   /* compute a local seq matrix in each processor */
4721   /*----------------------------------------------*/
4722   /* allocate bi array and free space for accumulating nonzero column info */
4723   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4724   bi[0] = 0;
4725 
4726   /* create and initialize a linked list */
4727   nlnk = N+1;
4728   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4729 
4730   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4731   len = 0;
4732   len  = ai[owners[rank+1]] - ai[owners[rank]];
4733   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4734   current_space = free_space;
4735 
4736   /* determine symbolic info for each local row */
4737   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4738 
4739   for (k=0; k<merge->nrecv; k++){
4740     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4741     nrows = *buf_ri_k[k];
4742     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4743     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4744   }
4745 
4746   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4747   len = 0;
4748   for (i=0;i<m;i++) {
4749     bnzi   = 0;
4750     /* add local non-zero cols of this proc's seqmat into lnk */
4751     arow   = owners[rank] + i;
4752     anzi   = ai[arow+1] - ai[arow];
4753     aj     = a->j + ai[arow];
4754     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4755     bnzi += nlnk;
4756     /* add received col data into lnk */
4757     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4758       if (i == *nextrow[k]) { /* i-th row */
4759         anzi = *(nextai[k]+1) - *nextai[k];
4760         aj   = buf_rj[k] + *nextai[k];
4761         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4762         bnzi += nlnk;
4763         nextrow[k]++; nextai[k]++;
4764       }
4765     }
4766     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4767 
4768     /* if free space is not available, make more free space */
4769     if (current_space->local_remaining<bnzi) {
4770       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4771       nspacedouble++;
4772     }
4773     /* copy data into free space, then initialize lnk */
4774     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4775     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4776 
4777     current_space->array           += bnzi;
4778     current_space->local_used      += bnzi;
4779     current_space->local_remaining -= bnzi;
4780 
4781     bi[i+1] = bi[i] + bnzi;
4782   }
4783 
4784   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4785 
4786   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4787   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4788   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4789 
4790   /* create symbolic parallel matrix B_mpi */
4791   /*---------------------------------------*/
4792   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4793   if (n==PETSC_DECIDE) {
4794     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4795   } else {
4796     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4797   }
4798   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4799   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4800   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4801 
4802   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4803   B_mpi->assembled     = PETSC_FALSE;
4804   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4805   merge->bi            = bi;
4806   merge->bj            = bj;
4807   merge->buf_ri        = buf_ri;
4808   merge->buf_rj        = buf_rj;
4809   merge->coi           = PETSC_NULL;
4810   merge->coj           = PETSC_NULL;
4811   merge->owners_co     = PETSC_NULL;
4812 
4813   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4814 
4815   /* attach the supporting struct to B_mpi for reuse */
4816   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4817   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4818   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4819   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4820   *mpimat = B_mpi;
4821 
4822   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4823   PetscFunctionReturn(0);
4824 }
4825 
4826 #undef __FUNCT__
4827 #define __FUNCT__ "MatMerge_SeqsToMPI"
4828 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4829 {
4830   PetscErrorCode   ierr;
4831 
4832   PetscFunctionBegin;
4833   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4834   if (scall == MAT_INITIAL_MATRIX){
4835     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4836   }
4837   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4838   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4839   PetscFunctionReturn(0);
4840 }
4841 
4842 #undef __FUNCT__
4843 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4844 /*@
4845      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4846           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4847           with MatGetSize()
4848 
4849     Not Collective
4850 
4851    Input Parameters:
4852 +    A - the matrix
4853 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4854 
4855    Output Parameter:
4856 .    A_loc - the local sequential matrix generated
4857 
4858     Level: developer
4859 
4860 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4861 
4862 @*/
4863 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4864 {
4865   PetscErrorCode  ierr;
4866   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4867   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4868   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4869   MatScalar       *aa=a->a,*ba=b->a,*cam;
4870   PetscScalar     *ca;
4871   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4872   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4873   PetscBool       match;
4874 
4875   PetscFunctionBegin;
4876   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4877   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4878   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4879   if (scall == MAT_INITIAL_MATRIX){
4880     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4881     ci[0] = 0;
4882     for (i=0; i<am; i++){
4883       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4884     }
4885     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4886     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4887     k = 0;
4888     for (i=0; i<am; i++) {
4889       ncols_o = bi[i+1] - bi[i];
4890       ncols_d = ai[i+1] - ai[i];
4891       /* off-diagonal portion of A */
4892       for (jo=0; jo<ncols_o; jo++) {
4893         col = cmap[*bj];
4894         if (col >= cstart) break;
4895         cj[k]   = col; bj++;
4896         ca[k++] = *ba++;
4897       }
4898       /* diagonal portion of A */
4899       for (j=0; j<ncols_d; j++) {
4900         cj[k]   = cstart + *aj++;
4901         ca[k++] = *aa++;
4902       }
4903       /* off-diagonal portion of A */
4904       for (j=jo; j<ncols_o; j++) {
4905         cj[k]   = cmap[*bj++];
4906         ca[k++] = *ba++;
4907       }
4908     }
4909     /* put together the new matrix */
4910     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4911     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4912     /* Since these are PETSc arrays, change flags to free them as necessary. */
4913     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4914     mat->free_a  = PETSC_TRUE;
4915     mat->free_ij = PETSC_TRUE;
4916     mat->nonew   = 0;
4917   } else if (scall == MAT_REUSE_MATRIX){
4918     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4919     ci = mat->i; cj = mat->j; cam = mat->a;
4920     for (i=0; i<am; i++) {
4921       /* off-diagonal portion of A */
4922       ncols_o = bi[i+1] - bi[i];
4923       for (jo=0; jo<ncols_o; jo++) {
4924         col = cmap[*bj];
4925         if (col >= cstart) break;
4926         *cam++ = *ba++; bj++;
4927       }
4928       /* diagonal portion of A */
4929       ncols_d = ai[i+1] - ai[i];
4930       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4931       /* off-diagonal portion of A */
4932       for (j=jo; j<ncols_o; j++) {
4933         *cam++ = *ba++; bj++;
4934       }
4935     }
4936   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4937   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 #undef __FUNCT__
4942 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4943 /*@C
4944      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4945 
4946     Not Collective
4947 
4948    Input Parameters:
4949 +    A - the matrix
4950 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4951 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4952 
4953    Output Parameter:
4954 .    A_loc - the local sequential matrix generated
4955 
4956     Level: developer
4957 
4958 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4959 
4960 @*/
4961 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4962 {
4963   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4964   PetscErrorCode    ierr;
4965   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4966   IS                isrowa,iscola;
4967   Mat               *aloc;
4968   PetscBool       match;
4969 
4970   PetscFunctionBegin;
4971   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4972   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4973   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4974   if (!row){
4975     start = A->rmap->rstart; end = A->rmap->rend;
4976     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4977   } else {
4978     isrowa = *row;
4979   }
4980   if (!col){
4981     start = A->cmap->rstart;
4982     cmap  = a->garray;
4983     nzA   = a->A->cmap->n;
4984     nzB   = a->B->cmap->n;
4985     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4986     ncols = 0;
4987     for (i=0; i<nzB; i++) {
4988       if (cmap[i] < start) idx[ncols++] = cmap[i];
4989       else break;
4990     }
4991     imark = i;
4992     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4993     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4994     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4995   } else {
4996     iscola = *col;
4997   }
4998   if (scall != MAT_INITIAL_MATRIX){
4999     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5000     aloc[0] = *A_loc;
5001   }
5002   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5003   *A_loc = aloc[0];
5004   ierr = PetscFree(aloc);CHKERRQ(ierr);
5005   if (!row){
5006     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5007   }
5008   if (!col){
5009     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5010   }
5011   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5012   PetscFunctionReturn(0);
5013 }
5014 
5015 #undef __FUNCT__
5016 #define __FUNCT__ "MatGetBrowsOfAcols"
5017 /*@C
5018     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5019 
5020     Collective on Mat
5021 
5022    Input Parameters:
5023 +    A,B - the matrices in mpiaij format
5024 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5025 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5026 
5027    Output Parameter:
5028 +    rowb, colb - index sets of rows and columns of B to extract
5029 -    B_seq - the sequential matrix generated
5030 
5031     Level: developer
5032 
5033 @*/
5034 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5035 {
5036   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5037   PetscErrorCode    ierr;
5038   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5039   IS                isrowb,iscolb;
5040   Mat               *bseq=PETSC_NULL;
5041 
5042   PetscFunctionBegin;
5043   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5044     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5045   }
5046   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5047 
5048   if (scall == MAT_INITIAL_MATRIX){
5049     start = A->cmap->rstart;
5050     cmap  = a->garray;
5051     nzA   = a->A->cmap->n;
5052     nzB   = a->B->cmap->n;
5053     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5054     ncols = 0;
5055     for (i=0; i<nzB; i++) {  /* row < local row index */
5056       if (cmap[i] < start) idx[ncols++] = cmap[i];
5057       else break;
5058     }
5059     imark = i;
5060     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5061     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5062     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5063     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5064   } else {
5065     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5066     isrowb = *rowb; iscolb = *colb;
5067     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5068     bseq[0] = *B_seq;
5069   }
5070   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5071   *B_seq = bseq[0];
5072   ierr = PetscFree(bseq);CHKERRQ(ierr);
5073   if (!rowb){
5074     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5075   } else {
5076     *rowb = isrowb;
5077   }
5078   if (!colb){
5079     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5080   } else {
5081     *colb = iscolb;
5082   }
5083   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5084   PetscFunctionReturn(0);
5085 }
5086 
5087 #undef __FUNCT__
5088 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5089 /*
5090     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5091     of the OFF-DIAGONAL portion of local A
5092 
5093     Collective on Mat
5094 
5095    Input Parameters:
5096 +    A,B - the matrices in mpiaij format
5097 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5098 
5099    Output Parameter:
5100 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5101 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5102 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5103 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5104 
5105     Level: developer
5106 
5107 */
5108 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5109 {
5110   VecScatter_MPI_General *gen_to,*gen_from;
5111   PetscErrorCode         ierr;
5112   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5113   Mat_SeqAIJ             *b_oth;
5114   VecScatter             ctx=a->Mvctx;
5115   MPI_Comm               comm=((PetscObject)ctx)->comm;
5116   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5117   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5118   PetscScalar            *rvalues,*svalues;
5119   MatScalar              *b_otha,*bufa,*bufA;
5120   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5121   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5122   MPI_Status             *sstatus,rstatus;
5123   PetscMPIInt            jj;
5124   PetscInt               *cols,sbs,rbs;
5125   PetscScalar            *vals;
5126 
5127   PetscFunctionBegin;
5128   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5129     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5130   }
5131   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5132   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5133 
5134   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5135   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5136   rvalues  = gen_from->values; /* holds the length of receiving row */
5137   svalues  = gen_to->values;   /* holds the length of sending row */
5138   nrecvs   = gen_from->n;
5139   nsends   = gen_to->n;
5140 
5141   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5142   srow     = gen_to->indices;   /* local row index to be sent */
5143   sstarts  = gen_to->starts;
5144   sprocs   = gen_to->procs;
5145   sstatus  = gen_to->sstatus;
5146   sbs      = gen_to->bs;
5147   rstarts  = gen_from->starts;
5148   rprocs   = gen_from->procs;
5149   rbs      = gen_from->bs;
5150 
5151   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5152   if (scall == MAT_INITIAL_MATRIX){
5153     /* i-array */
5154     /*---------*/
5155     /*  post receives */
5156     for (i=0; i<nrecvs; i++){
5157       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5158       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5159       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5160     }
5161 
5162     /* pack the outgoing message */
5163     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5164     sstartsj[0] = 0;  rstartsj[0] = 0;
5165     len = 0; /* total length of j or a array to be sent */
5166     k = 0;
5167     for (i=0; i<nsends; i++){
5168       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5169       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5170       for (j=0; j<nrows; j++) {
5171         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5172         for (l=0; l<sbs; l++){
5173           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5174           rowlen[j*sbs+l] = ncols;
5175           len += ncols;
5176           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5177         }
5178         k++;
5179       }
5180       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5181       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5182     }
5183     /* recvs and sends of i-array are completed */
5184     i = nrecvs;
5185     while (i--) {
5186       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5187     }
5188     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5189 
5190     /* allocate buffers for sending j and a arrays */
5191     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5192     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5193 
5194     /* create i-array of B_oth */
5195     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5196     b_othi[0] = 0;
5197     len = 0; /* total length of j or a array to be received */
5198     k = 0;
5199     for (i=0; i<nrecvs; i++){
5200       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5201       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5202       for (j=0; j<nrows; j++) {
5203         b_othi[k+1] = b_othi[k] + rowlen[j];
5204         len += rowlen[j]; k++;
5205       }
5206       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5207     }
5208 
5209     /* allocate space for j and a arrrays of B_oth */
5210     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5211     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5212 
5213     /* j-array */
5214     /*---------*/
5215     /*  post receives of j-array */
5216     for (i=0; i<nrecvs; i++){
5217       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5218       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5219     }
5220 
5221     /* pack the outgoing message j-array */
5222     k = 0;
5223     for (i=0; i<nsends; i++){
5224       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5225       bufJ = bufj+sstartsj[i];
5226       for (j=0; j<nrows; j++) {
5227         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5228         for (ll=0; ll<sbs; ll++){
5229           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5230           for (l=0; l<ncols; l++){
5231             *bufJ++ = cols[l];
5232           }
5233           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5234         }
5235       }
5236       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5237     }
5238 
5239     /* recvs and sends of j-array are completed */
5240     i = nrecvs;
5241     while (i--) {
5242       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5243     }
5244     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5245   } else if (scall == MAT_REUSE_MATRIX){
5246     sstartsj = *startsj_s;
5247     rstartsj = *startsj_r;
5248     bufa     = *bufa_ptr;
5249     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5250     b_otha   = b_oth->a;
5251   } else {
5252     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5253   }
5254 
5255   /* a-array */
5256   /*---------*/
5257   /*  post receives of a-array */
5258   for (i=0; i<nrecvs; i++){
5259     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5260     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5261   }
5262 
5263   /* pack the outgoing message a-array */
5264   k = 0;
5265   for (i=0; i<nsends; i++){
5266     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5267     bufA = bufa+sstartsj[i];
5268     for (j=0; j<nrows; j++) {
5269       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5270       for (ll=0; ll<sbs; ll++){
5271         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5272         for (l=0; l<ncols; l++){
5273           *bufA++ = vals[l];
5274         }
5275         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5276       }
5277     }
5278     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5279   }
5280   /* recvs and sends of a-array are completed */
5281   i = nrecvs;
5282   while (i--) {
5283     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5284   }
5285   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5286   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5287 
5288   if (scall == MAT_INITIAL_MATRIX){
5289     /* put together the new matrix */
5290     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5291 
5292     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5293     /* Since these are PETSc arrays, change flags to free them as necessary. */
5294     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5295     b_oth->free_a  = PETSC_TRUE;
5296     b_oth->free_ij = PETSC_TRUE;
5297     b_oth->nonew   = 0;
5298 
5299     ierr = PetscFree(bufj);CHKERRQ(ierr);
5300     if (!startsj_s || !bufa_ptr){
5301       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5302       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5303     } else {
5304       *startsj_s = sstartsj;
5305       *startsj_r = rstartsj;
5306       *bufa_ptr  = bufa;
5307     }
5308   }
5309   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5310   PetscFunctionReturn(0);
5311 }
5312 
5313 #undef __FUNCT__
5314 #define __FUNCT__ "MatGetCommunicationStructs"
5315 /*@C
5316   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5317 
5318   Not Collective
5319 
5320   Input Parameters:
5321 . A - The matrix in mpiaij format
5322 
5323   Output Parameter:
5324 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5325 . colmap - A map from global column index to local index into lvec
5326 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5327 
5328   Level: developer
5329 
5330 @*/
5331 #if defined (PETSC_USE_CTABLE)
5332 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5333 #else
5334 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5335 #endif
5336 {
5337   Mat_MPIAIJ *a;
5338 
5339   PetscFunctionBegin;
5340   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5341   PetscValidPointer(lvec, 2);
5342   PetscValidPointer(colmap, 3);
5343   PetscValidPointer(multScatter, 4);
5344   a = (Mat_MPIAIJ *) A->data;
5345   if (lvec) *lvec = a->lvec;
5346   if (colmap) *colmap = a->colmap;
5347   if (multScatter) *multScatter = a->Mvctx;
5348   PetscFunctionReturn(0);
5349 }
5350 
5351 EXTERN_C_BEGIN
5352 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5353 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5354 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5355 EXTERN_C_END
5356 
5357 #undef __FUNCT__
5358 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5359 /*
5360     Computes (B'*A')' since computing B*A directly is untenable
5361 
5362                n                       p                          p
5363         (              )       (              )         (                  )
5364       m (      A       )  *  n (       B      )   =   m (         C        )
5365         (              )       (              )         (                  )
5366 
5367 */
5368 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5369 {
5370   PetscErrorCode     ierr;
5371   Mat                At,Bt,Ct;
5372 
5373   PetscFunctionBegin;
5374   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5375   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5376   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5377   ierr = MatDestroy(&At);CHKERRQ(ierr);
5378   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5379   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5380   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5381   PetscFunctionReturn(0);
5382 }
5383 
5384 #undef __FUNCT__
5385 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5386 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5387 {
5388   PetscErrorCode ierr;
5389   PetscInt       m=A->rmap->n,n=B->cmap->n;
5390   Mat            Cmat;
5391 
5392   PetscFunctionBegin;
5393   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5394   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5395   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5396   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5397   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5398   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5399   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5400   *C   = Cmat;
5401   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5402   PetscFunctionReturn(0);
5403 }
5404 
5405 /* ----------------------------------------------------------------*/
5406 #undef __FUNCT__
5407 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5408 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5409 {
5410   PetscErrorCode ierr;
5411 
5412   PetscFunctionBegin;
5413   if (scall == MAT_INITIAL_MATRIX){
5414     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5415   }
5416   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5417   PetscFunctionReturn(0);
5418 }
5419 
5420 EXTERN_C_BEGIN
5421 #if defined(PETSC_HAVE_MUMPS)
5422 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5423 #endif
5424 #if defined(PETSC_HAVE_PASTIX)
5425 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5426 #endif
5427 #if defined(PETSC_HAVE_SUPERLU_DIST)
5428 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5429 #endif
5430 #if defined(PETSC_HAVE_SPOOLES)
5431 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5432 #endif
5433 EXTERN_C_END
5434 
5435 /*MC
5436    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5437 
5438    Options Database Keys:
5439 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5440 
5441   Level: beginner
5442 
5443 .seealso: MatCreateMPIAIJ()
5444 M*/
5445 
5446 EXTERN_C_BEGIN
5447 #undef __FUNCT__
5448 #define __FUNCT__ "MatCreate_MPIAIJ"
5449 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5450 {
5451   Mat_MPIAIJ     *b;
5452   PetscErrorCode ierr;
5453   PetscMPIInt    size;
5454 
5455   PetscFunctionBegin;
5456   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5457 
5458   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5459   B->data         = (void*)b;
5460   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5461   B->rmap->bs     = 1;
5462   B->assembled    = PETSC_FALSE;
5463 
5464   B->insertmode   = NOT_SET_VALUES;
5465   b->size         = size;
5466   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5467 
5468   /* build cache for off array entries formed */
5469   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5470   b->donotstash  = PETSC_FALSE;
5471   b->colmap      = 0;
5472   b->garray      = 0;
5473   b->roworiented = PETSC_TRUE;
5474 
5475   /* stuff used for matrix vector multiply */
5476   b->lvec      = PETSC_NULL;
5477   b->Mvctx     = PETSC_NULL;
5478 
5479   /* stuff for MatGetRow() */
5480   b->rowindices   = 0;
5481   b->rowvalues    = 0;
5482   b->getrowactive = PETSC_FALSE;
5483 
5484 #if defined(PETSC_HAVE_SPOOLES)
5485   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5486                                      "MatGetFactor_mpiaij_spooles",
5487                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5488 #endif
5489 #if defined(PETSC_HAVE_MUMPS)
5490   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5491                                      "MatGetFactor_aij_mumps",
5492                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5493 #endif
5494 #if defined(PETSC_HAVE_PASTIX)
5495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5496 					   "MatGetFactor_mpiaij_pastix",
5497 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5498 #endif
5499 #if defined(PETSC_HAVE_SUPERLU_DIST)
5500   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5501                                      "MatGetFactor_mpiaij_superlu_dist",
5502                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5503 #endif
5504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5505                                      "MatStoreValues_MPIAIJ",
5506                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5508                                      "MatRetrieveValues_MPIAIJ",
5509                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5511 				     "MatGetDiagonalBlock_MPIAIJ",
5512                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5514 				     "MatIsTranspose_MPIAIJ",
5515 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5516   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5517 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5518 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5520 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5521 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5523 				     "MatDiagonalScaleLocal_MPIAIJ",
5524 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5525   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5526                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5527                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5528   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5529                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5530                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5531   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5532                                      "MatConvert_MPIAIJ_MPISBAIJ",
5533                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5534   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5535                                      "MatMatMult_MPIDense_MPIAIJ",
5536                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5537   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5538                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5539                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5540   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5541                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5542                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5543   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5544   PetscFunctionReturn(0);
5545 }
5546 EXTERN_C_END
5547 
5548 #undef __FUNCT__
5549 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5550 /*@
5551      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5552          and "off-diagonal" part of the matrix in CSR format.
5553 
5554    Collective on MPI_Comm
5555 
5556    Input Parameters:
5557 +  comm - MPI communicator
5558 .  m - number of local rows (Cannot be PETSC_DECIDE)
5559 .  n - This value should be the same as the local size used in creating the
5560        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5561        calculated if N is given) For square matrices n is almost always m.
5562 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5563 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5564 .   i - row indices for "diagonal" portion of matrix
5565 .   j - column indices
5566 .   a - matrix values
5567 .   oi - row indices for "off-diagonal" portion of matrix
5568 .   oj - column indices
5569 -   oa - matrix values
5570 
5571    Output Parameter:
5572 .   mat - the matrix
5573 
5574    Level: advanced
5575 
5576    Notes:
5577        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5578        must free the arrays once the matrix has been destroyed and not before.
5579 
5580        The i and j indices are 0 based
5581 
5582        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5583 
5584        This sets local rows and cannot be used to set off-processor values.
5585 
5586        You cannot later use MatSetValues() to change values in this matrix.
5587 
5588 .keywords: matrix, aij, compressed row, sparse, parallel
5589 
5590 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5591           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5592 @*/
5593 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5594 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5595 {
5596   PetscErrorCode ierr;
5597   Mat_MPIAIJ     *maij;
5598 
5599  PetscFunctionBegin;
5600   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5601   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5602   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5603   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5604   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5605   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5606   maij = (Mat_MPIAIJ*) (*mat)->data;
5607   maij->donotstash     = PETSC_TRUE;
5608   (*mat)->preallocated = PETSC_TRUE;
5609 
5610   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5611   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5612   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5613   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5614 
5615   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5616   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5617 
5618   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5619   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5620   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5621   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5622 
5623   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5624   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5625   PetscFunctionReturn(0);
5626 }
5627 
5628 /*
5629     Special version for direct calls from Fortran
5630 */
5631 #include <private/fortranimpl.h>
5632 
5633 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5634 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5635 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5636 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5637 #endif
5638 
5639 /* Change these macros so can be used in void function */
5640 #undef CHKERRQ
5641 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5642 #undef SETERRQ2
5643 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5644 #undef SETERRQ
5645 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5646 
5647 EXTERN_C_BEGIN
5648 #undef __FUNCT__
5649 #define __FUNCT__ "matsetvaluesmpiaij_"
5650 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5651 {
5652   Mat             mat = *mmat;
5653   PetscInt        m = *mm, n = *mn;
5654   InsertMode      addv = *maddv;
5655   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5656   PetscScalar     value;
5657   PetscErrorCode  ierr;
5658 
5659   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5660   if (mat->insertmode == NOT_SET_VALUES) {
5661     mat->insertmode = addv;
5662   }
5663 #if defined(PETSC_USE_DEBUG)
5664   else if (mat->insertmode != addv) {
5665     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5666   }
5667 #endif
5668   {
5669   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5670   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5671   PetscBool       roworiented = aij->roworiented;
5672 
5673   /* Some Variables required in the macro */
5674   Mat             A = aij->A;
5675   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5676   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5677   MatScalar       *aa = a->a;
5678   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5679   Mat             B = aij->B;
5680   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5681   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5682   MatScalar       *ba = b->a;
5683 
5684   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5685   PetscInt        nonew = a->nonew;
5686   MatScalar       *ap1,*ap2;
5687 
5688   PetscFunctionBegin;
5689   for (i=0; i<m; i++) {
5690     if (im[i] < 0) continue;
5691 #if defined(PETSC_USE_DEBUG)
5692     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5693 #endif
5694     if (im[i] >= rstart && im[i] < rend) {
5695       row      = im[i] - rstart;
5696       lastcol1 = -1;
5697       rp1      = aj + ai[row];
5698       ap1      = aa + ai[row];
5699       rmax1    = aimax[row];
5700       nrow1    = ailen[row];
5701       low1     = 0;
5702       high1    = nrow1;
5703       lastcol2 = -1;
5704       rp2      = bj + bi[row];
5705       ap2      = ba + bi[row];
5706       rmax2    = bimax[row];
5707       nrow2    = bilen[row];
5708       low2     = 0;
5709       high2    = nrow2;
5710 
5711       for (j=0; j<n; j++) {
5712         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5713         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5714         if (in[j] >= cstart && in[j] < cend){
5715           col = in[j] - cstart;
5716           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5717         } else if (in[j] < 0) continue;
5718 #if defined(PETSC_USE_DEBUG)
5719         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5720 #endif
5721         else {
5722           if (mat->was_assembled) {
5723             if (!aij->colmap) {
5724               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5725             }
5726 #if defined (PETSC_USE_CTABLE)
5727             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5728 	    col--;
5729 #else
5730             col = aij->colmap[in[j]] - 1;
5731 #endif
5732             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5733               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5734               col =  in[j];
5735               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5736               B = aij->B;
5737               b = (Mat_SeqAIJ*)B->data;
5738               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5739               rp2      = bj + bi[row];
5740               ap2      = ba + bi[row];
5741               rmax2    = bimax[row];
5742               nrow2    = bilen[row];
5743               low2     = 0;
5744               high2    = nrow2;
5745               bm       = aij->B->rmap->n;
5746               ba = b->a;
5747             }
5748           } else col = in[j];
5749           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5750         }
5751       }
5752     } else {
5753       if (!aij->donotstash) {
5754         if (roworiented) {
5755           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5756         } else {
5757           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5758         }
5759       }
5760     }
5761   }}
5762   PetscFunctionReturnVoid();
5763 }
5764 EXTERN_C_END
5765 
5766