xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ff1a2e19e92ca3eb486258317b2f290407e2b2d1)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           nonew = a->nonew;
506           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
507         } else if (in[j] < 0) continue;
508 #if defined(PETSC_USE_DEBUG)
509         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
510 #endif
511         else {
512           if (mat->was_assembled) {
513             if (!aij->colmap) {
514               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
515             }
516 #if defined (PETSC_USE_CTABLE)
517             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
518 	    col--;
519 #else
520             col = aij->colmap[in[j]] - 1;
521 #endif
522             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
523               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524               col =  in[j];
525               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
526               B = aij->B;
527               b = (Mat_SeqAIJ*)B->data;
528               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
529               rp2      = bj + bi[row];
530               ap2      = ba + bi[row];
531               rmax2    = bimax[row];
532               nrow2    = bilen[row];
533               low2     = 0;
534               high2    = nrow2;
535               bm       = aij->B->rmap->n;
536               ba = b->a;
537             }
538           } else col = in[j];
539           nonew = b->nonew;
540           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
541         }
542       }
543     } else {
544       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
545       if (!aij->donotstash) {
546         mat->assembled = PETSC_FALSE;
547         if (roworiented) {
548           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatGetValues_MPIAIJ"
560 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
561 {
562   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
563   PetscErrorCode ierr;
564   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
565   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
566 
567   PetscFunctionBegin;
568   for (i=0; i<m; i++) {
569     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
570     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
571     if (idxm[i] >= rstart && idxm[i] < rend) {
572       row = idxm[i] - rstart;
573       for (j=0; j<n; j++) {
574         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
575         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
576         if (idxn[j] >= cstart && idxn[j] < cend){
577           col = idxn[j] - cstart;
578           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
579         } else {
580           if (!aij->colmap) {
581             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
582           }
583 #if defined (PETSC_USE_CTABLE)
584           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
585           col --;
586 #else
587           col = aij->colmap[idxn[j]] - 1;
588 #endif
589           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
590           else {
591             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
592           }
593         }
594       }
595     } else {
596       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
597     }
598   }
599   PetscFunctionReturn(0);
600 }
601 
602 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
606 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
607 {
608   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
609   PetscErrorCode ierr;
610   PetscInt       nstash,reallocs;
611   InsertMode     addv;
612 
613   PetscFunctionBegin;
614   if (aij->donotstash || mat->nooffprocentries) {
615     PetscFunctionReturn(0);
616   }
617 
618   /* make sure all processors are either in INSERTMODE or ADDMODE */
619   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
620   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
621   mat->insertmode = addv; /* in case this processor had no cache */
622 
623   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
624   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
625   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
631 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
632 {
633   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
634   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
635   PetscErrorCode ierr;
636   PetscMPIInt    n;
637   PetscInt       i,j,rstart,ncols,flg;
638   PetscInt       *row,*col;
639   PetscBool      other_disassembled;
640   PetscScalar    *val;
641   InsertMode     addv = mat->insertmode;
642 
643   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
644   PetscFunctionBegin;
645   if (!aij->donotstash && !mat->nooffprocentries) {
646     while (1) {
647       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
648       if (!flg) break;
649 
650       for (i=0; i<n;) {
651         /* Now identify the consecutive vals belonging to the same row */
652         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
653         if (j < n) ncols = j-i;
654         else       ncols = n-i;
655         /* Now assemble all these values with a single function call */
656         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
657         i = j;
658       }
659     }
660     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
661   }
662   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
663   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
664 
665   /* determine if any processor has disassembled, if so we must
666      also disassemble ourselfs, in order that we may reassemble. */
667   /*
668      if nonzero structure of submatrix B cannot change then we know that
669      no processor disassembled thus we can skip this stuff
670   */
671   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
672     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
673     if (mat->was_assembled && !other_disassembled) {
674       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
675     }
676   }
677   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
678     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
679   }
680   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
681   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
682   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
683   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
684 
685   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
686   aij->rowvalues = 0;
687 
688   /* used by MatAXPY() */
689   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
690   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
691 
692   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
693   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
699 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
700 {
701   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
702   PetscErrorCode ierr;
703 
704   PetscFunctionBegin;
705   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
706   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatZeroRows_MPIAIJ"
712 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
713 {
714   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode    ierr;
716   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
717   PetscInt          i,*owners = A->rmap->range;
718   PetscInt          *nprocs,j,idx,nsends,row;
719   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
720   PetscInt          *rvalues,count,base,slen,*source;
721   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
722   MPI_Comm          comm = ((PetscObject)A)->comm;
723   MPI_Request       *send_waits,*recv_waits;
724   MPI_Status        recv_status,*send_status;
725   const PetscScalar *xx;
726   PetscScalar       *bb;
727 #if defined(PETSC_DEBUG)
728   PetscBool      found = PETSC_FALSE;
729 #endif
730 
731   PetscFunctionBegin;
732   /*  first count number of contributors to each processor */
733   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
734   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
735   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
736   j = 0;
737   for (i=0; i<N; i++) {
738     if (lastidx > (idx = rows[i])) j = 0;
739     lastidx = idx;
740     for (; j<size; j++) {
741       if (idx >= owners[j] && idx < owners[j+1]) {
742         nprocs[2*j]++;
743         nprocs[2*j+1] = 1;
744         owner[i] = j;
745 #if defined(PETSC_DEBUG)
746         found = PETSC_TRUE;
747 #endif
748         break;
749       }
750     }
751 #if defined(PETSC_DEBUG)
752     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
753     found = PETSC_FALSE;
754 #endif
755   }
756   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
757 
758   if (A->nooffproczerorows) {
759     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
760     nrecvs = nsends;
761     nmax   = N;
762   } else {
763     /* inform other processors of number of messages and max length*/
764     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
765   }
766 
767   /* post receives:   */
768   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
769   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
770   for (i=0; i<nrecvs; i++) {
771     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
772   }
773 
774   /* do sends:
775       1) starts[i] gives the starting index in svalues for stuff going to
776          the ith processor
777   */
778   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
779   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
780   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
781   starts[0] = 0;
782   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
783   for (i=0; i<N; i++) {
784     svalues[starts[owner[i]]++] = rows[i];
785   }
786 
787   starts[0] = 0;
788   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
789   count = 0;
790   for (i=0; i<size; i++) {
791     if (nprocs[2*i+1]) {
792       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
793     }
794   }
795   ierr = PetscFree(starts);CHKERRQ(ierr);
796 
797   base = owners[rank];
798 
799   /*  wait on receives */
800   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
801   count  = nrecvs; slen = 0;
802   while (count) {
803     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
804     /* unpack receives into our local space */
805     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
806     source[imdex]  = recv_status.MPI_SOURCE;
807     lens[imdex]    = n;
808     slen          += n;
809     count--;
810   }
811   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
812 
813   /* move the data into the send scatter */
814   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
815   count = 0;
816   for (i=0; i<nrecvs; i++) {
817     values = rvalues + i*nmax;
818     for (j=0; j<lens[i]; j++) {
819       lrows[count++] = values[j] - base;
820     }
821   }
822   ierr = PetscFree(rvalues);CHKERRQ(ierr);
823   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
824   ierr = PetscFree(owner);CHKERRQ(ierr);
825   ierr = PetscFree(nprocs);CHKERRQ(ierr);
826 
827   /* fix right hand side if needed */
828   if (x && b) {
829     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
830     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
831     for (i=0; i<slen; i++) {
832       bb[lrows[i]] = diag*xx[lrows[i]];
833     }
834     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
835     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
836   }
837   /*
838         Zero the required rows. If the "diagonal block" of the matrix
839      is square and the user wishes to set the diagonal we use separate
840      code so that MatSetValues() is not called for each diagonal allocating
841      new memory, thus calling lots of mallocs and slowing things down.
842 
843   */
844   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
845   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
846   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
847     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
848   } else if (diag != 0.0) {
849     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
850     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
851       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
852 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
853     }
854     for (i = 0; i < slen; i++) {
855       row  = lrows[i] + rstart;
856       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
857     }
858     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
859     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
860   } else {
861     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(lrows);CHKERRQ(ierr);
864 
865   /* wait on sends */
866   if (nsends) {
867     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
868     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
869     ierr = PetscFree(send_status);CHKERRQ(ierr);
870   }
871   ierr = PetscFree(send_waits);CHKERRQ(ierr);
872   ierr = PetscFree(svalues);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
878 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
879 {
880   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
881   PetscErrorCode    ierr;
882   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
883   PetscInt          i,*owners = A->rmap->range;
884   PetscInt          *nprocs,j,idx,nsends;
885   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
886   PetscInt          *rvalues,count,base,slen,*source;
887   PetscInt          *lens,*lrows,*values,m;
888   MPI_Comm          comm = ((PetscObject)A)->comm;
889   MPI_Request       *send_waits,*recv_waits;
890   MPI_Status        recv_status,*send_status;
891   const PetscScalar *xx;
892   PetscScalar       *bb,*mask;
893   Vec               xmask,lmask;
894   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
895   const PetscInt    *aj, *ii,*ridx;
896   PetscScalar       *aa;
897 #if defined(PETSC_DEBUG)
898   PetscBool         found = PETSC_FALSE;
899 #endif
900 
901   PetscFunctionBegin;
902   /*  first count number of contributors to each processor */
903   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
904   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
905   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
906   j = 0;
907   for (i=0; i<N; i++) {
908     if (lastidx > (idx = rows[i])) j = 0;
909     lastidx = idx;
910     for (; j<size; j++) {
911       if (idx >= owners[j] && idx < owners[j+1]) {
912         nprocs[2*j]++;
913         nprocs[2*j+1] = 1;
914         owner[i] = j;
915 #if defined(PETSC_DEBUG)
916         found = PETSC_TRUE;
917 #endif
918         break;
919       }
920     }
921 #if defined(PETSC_DEBUG)
922     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
923     found = PETSC_FALSE;
924 #endif
925   }
926   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
927 
928   /* inform other processors of number of messages and max length*/
929   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
930 
931   /* post receives:   */
932   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
933   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
934   for (i=0; i<nrecvs; i++) {
935     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
936   }
937 
938   /* do sends:
939       1) starts[i] gives the starting index in svalues for stuff going to
940          the ith processor
941   */
942   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
943   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
944   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
945   starts[0] = 0;
946   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
947   for (i=0; i<N; i++) {
948     svalues[starts[owner[i]]++] = rows[i];
949   }
950 
951   starts[0] = 0;
952   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
953   count = 0;
954   for (i=0; i<size; i++) {
955     if (nprocs[2*i+1]) {
956       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
957     }
958   }
959   ierr = PetscFree(starts);CHKERRQ(ierr);
960 
961   base = owners[rank];
962 
963   /*  wait on receives */
964   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
965   count  = nrecvs; slen = 0;
966   while (count) {
967     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
968     /* unpack receives into our local space */
969     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
970     source[imdex]  = recv_status.MPI_SOURCE;
971     lens[imdex]    = n;
972     slen          += n;
973     count--;
974   }
975   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
976 
977   /* move the data into the send scatter */
978   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
979   count = 0;
980   for (i=0; i<nrecvs; i++) {
981     values = rvalues + i*nmax;
982     for (j=0; j<lens[i]; j++) {
983       lrows[count++] = values[j] - base;
984     }
985   }
986   ierr = PetscFree(rvalues);CHKERRQ(ierr);
987   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
988   ierr = PetscFree(owner);CHKERRQ(ierr);
989   ierr = PetscFree(nprocs);CHKERRQ(ierr);
990   /* lrows are the local rows to be zeroed, slen is the number of local rows */
991 
992   /* zero diagonal part of matrix */
993   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
994 
995   /* handle off diagonal part of matrix */
996   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
997   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
998   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
999   for (i=0; i<slen; i++) {
1000     bb[lrows[i]] = 1;
1001   }
1002   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1003   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1006   if (x) {
1007     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1009     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1010     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1011   }
1012   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1013 
1014   /* remove zeroed rows of off diagonal matrix */
1015   ii = aij->i;
1016   for (i=0; i<slen; i++) {
1017     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1018   }
1019 
1020   /* loop over all elements of off process part of matrix zeroing removed columns*/
1021   if (aij->compressedrow.use){
1022     m    = aij->compressedrow.nrows;
1023     ii   = aij->compressedrow.i;
1024     ridx = aij->compressedrow.rindex;
1025     for (i=0; i<m; i++){
1026       n   = ii[i+1] - ii[i];
1027       aj  = aij->j + ii[i];
1028       aa  = aij->a + ii[i];
1029 
1030       for (j=0; j<n; j++) {
1031         if (PetscAbsScalar(mask[*aj])) {
1032           if (b) bb[*ridx] -= *aa*xx[*aj];
1033           *aa        = 0.0;
1034         }
1035         aa++;
1036         aj++;
1037       }
1038       ridx++;
1039     }
1040   } else { /* do not use compressed row format */
1041     m = l->B->rmap->n;
1042     for (i=0; i<m; i++) {
1043       n   = ii[i+1] - ii[i];
1044       aj  = aij->j + ii[i];
1045       aa  = aij->a + ii[i];
1046       for (j=0; j<n; j++) {
1047         if (PetscAbsScalar(mask[*aj])) {
1048           if (b) bb[i] -= *aa*xx[*aj];
1049           *aa    = 0.0;
1050         }
1051         aa++;
1052         aj++;
1053       }
1054     }
1055   }
1056   if (x) {
1057     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1058     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1059   }
1060   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1061   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1062   ierr = PetscFree(lrows);CHKERRQ(ierr);
1063 
1064   /* wait on sends */
1065   if (nsends) {
1066     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1067     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1068     ierr = PetscFree(send_status);CHKERRQ(ierr);
1069   }
1070   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1071   ierr = PetscFree(svalues);CHKERRQ(ierr);
1072 
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatMult_MPIAIJ"
1078 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1079 {
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081   PetscErrorCode ierr;
1082   PetscInt       nt;
1083 
1084   PetscFunctionBegin;
1085   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1086   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1087   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1089   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1096 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1108 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1116   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1123 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1124 {
1125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1126   PetscErrorCode ierr;
1127   PetscBool      merged;
1128 
1129   PetscFunctionBegin;
1130   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1131   /* do nondiagonal part */
1132   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1133   if (!merged) {
1134     /* send it on its way */
1135     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1136     /* do local part */
1137     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1138     /* receive remote parts: note this assumes the values are not actually */
1139     /* added in yy until the next line, */
1140     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   } else {
1142     /* do local part */
1143     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1144     /* send it on its way */
1145     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1146     /* values actually were received in the Begin() but we need to call this nop */
1147     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148   }
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 EXTERN_C_BEGIN
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1155 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1156 {
1157   MPI_Comm       comm;
1158   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1159   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1160   IS             Me,Notme;
1161   PetscErrorCode ierr;
1162   PetscInt       M,N,first,last,*notme,i;
1163   PetscMPIInt    size;
1164 
1165   PetscFunctionBegin;
1166 
1167   /* Easy test: symmetric diagonal block */
1168   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1169   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1170   if (!*f) PetscFunctionReturn(0);
1171   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1172   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1173   if (size == 1) PetscFunctionReturn(0);
1174 
1175   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1176   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1177   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1178   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1179   for (i=0; i<first; i++) notme[i] = i;
1180   for (i=last; i<M; i++) notme[i-last+first] = i;
1181   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1182   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1183   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1184   Aoff = Aoffs[0];
1185   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1186   Boff = Boffs[0];
1187   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1188   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1189   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1190   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1191   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1192   ierr = PetscFree(notme);CHKERRQ(ierr);
1193    PetscFunctionReturn(0);
1194 }
1195 EXTERN_C_END
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1199 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   /* do nondiagonal part */
1206   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1207   /* send it on its way */
1208   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1209   /* do local part */
1210   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1211   /* receive remote parts */
1212   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /*
1217   This only works correctly for square matrices where the subblock A->A is the
1218    diagonal block
1219 */
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1222 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1223 {
1224   PetscErrorCode ierr;
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226 
1227   PetscFunctionBegin;
1228   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1229   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1230   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatScale_MPIAIJ"
1236 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1237 {
1238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1243   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatDestroy_MPIAIJ"
1249 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1250 {
1251   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255 #if defined(PETSC_USE_LOG)
1256   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1257 #endif
1258   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1259   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1260   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1261   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1262 #if defined (PETSC_USE_CTABLE)
1263   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1264 #else
1265   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1266 #endif
1267   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1268   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1269   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1270   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1271   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1272   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1273 
1274   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1288 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1289 {
1290   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1291   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1292   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1293   PetscErrorCode    ierr;
1294   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1295   int               fd;
1296   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1297   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1298   PetscScalar       *column_values;
1299   PetscInt          message_count,flowcontrolcount;
1300 
1301   PetscFunctionBegin;
1302   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1303   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1304   nz   = A->nz + B->nz;
1305   if (!rank) {
1306     header[0] = MAT_FILE_CLASSID;
1307     header[1] = mat->rmap->N;
1308     header[2] = mat->cmap->N;
1309     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1310     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1311     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1312     /* get largest number of rows any processor has */
1313     rlen = mat->rmap->n;
1314     range = mat->rmap->range;
1315     for (i=1; i<size; i++) {
1316       rlen = PetscMax(rlen,range[i+1] - range[i]);
1317     }
1318   } else {
1319     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1320     rlen = mat->rmap->n;
1321   }
1322 
1323   /* load up the local row counts */
1324   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1325   for (i=0; i<mat->rmap->n; i++) {
1326     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1327   }
1328 
1329   /* store the row lengths to the file */
1330   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1331   if (!rank) {
1332     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1333     for (i=1; i<size; i++) {
1334       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1335       rlen = range[i+1] - range[i];
1336       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1337       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1338     }
1339     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1340   } else {
1341     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1342     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1343     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1344   }
1345   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1346 
1347   /* load up the local column indices */
1348   nzmax = nz; /* )th processor needs space a largest processor needs */
1349   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1350   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1351   cnt  = 0;
1352   for (i=0; i<mat->rmap->n; i++) {
1353     for (j=B->i[i]; j<B->i[i+1]; j++) {
1354       if ( (col = garray[B->j[j]]) > cstart) break;
1355       column_indices[cnt++] = col;
1356     }
1357     for (k=A->i[i]; k<A->i[i+1]; k++) {
1358       column_indices[cnt++] = A->j[k] + cstart;
1359     }
1360     for (; j<B->i[i+1]; j++) {
1361       column_indices[cnt++] = garray[B->j[j]];
1362     }
1363   }
1364   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1365 
1366   /* store the column indices to the file */
1367    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1368   if (!rank) {
1369     MPI_Status status;
1370     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1371     for (i=1; i<size; i++) {
1372       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1373       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1374       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1375       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1376       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1377     }
1378      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1379   } else {
1380     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1381     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1382     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1383     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1384   }
1385   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1386 
1387   /* load up the local column values */
1388   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1389   cnt  = 0;
1390   for (i=0; i<mat->rmap->n; i++) {
1391     for (j=B->i[i]; j<B->i[i+1]; j++) {
1392       if ( garray[B->j[j]] > cstart) break;
1393       column_values[cnt++] = B->a[j];
1394     }
1395     for (k=A->i[i]; k<A->i[i+1]; k++) {
1396       column_values[cnt++] = A->a[k];
1397     }
1398     for (; j<B->i[i+1]; j++) {
1399       column_values[cnt++] = B->a[j];
1400     }
1401   }
1402   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1403 
1404   /* store the column values to the file */
1405    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1406   if (!rank) {
1407     MPI_Status status;
1408     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1409     for (i=1; i<size; i++) {
1410        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1411       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1412       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1413       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1414       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1415     }
1416     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1417   } else {
1418     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1419     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1420     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1421     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1422   }
1423   ierr = PetscFree(column_values);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1429 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1430 {
1431   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1432   PetscErrorCode    ierr;
1433   PetscMPIInt       rank = aij->rank,size = aij->size;
1434   PetscBool         isdraw,iascii,isbinary;
1435   PetscViewer       sviewer;
1436   PetscViewerFormat format;
1437 
1438   PetscFunctionBegin;
1439   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1440   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1441   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1442   if (iascii) {
1443     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1444     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1445       MatInfo    info;
1446       PetscBool  inodes;
1447 
1448       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1449       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1450       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1451       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1452       if (!inodes) {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1454 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       } else {
1456         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1457 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1458       }
1459       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1461       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1463       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1465       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1466       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1467       PetscFunctionReturn(0);
1468     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1469       PetscInt   inodecount,inodelimit,*inodes;
1470       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1471       if (inodes) {
1472         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1473       } else {
1474         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1475       }
1476       PetscFunctionReturn(0);
1477     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1478       PetscFunctionReturn(0);
1479     }
1480   } else if (isbinary) {
1481     if (size == 1) {
1482       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1483       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1484     } else {
1485       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1486     }
1487     PetscFunctionReturn(0);
1488   } else if (isdraw) {
1489     PetscDraw  draw;
1490     PetscBool  isnull;
1491     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1492     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1493   }
1494 
1495   if (size == 1) {
1496     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1497     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1498   } else {
1499     /* assemble the entire matrix onto first processor. */
1500     Mat         A;
1501     Mat_SeqAIJ  *Aloc;
1502     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1503     MatScalar   *a;
1504 
1505     if (mat->rmap->N > 1024) {
1506       PetscBool  flg = PETSC_FALSE;
1507 
1508       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1509       if (!flg) {
1510         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1511       }
1512     }
1513 
1514     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1515     if (!rank) {
1516       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1517     } else {
1518       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1519     }
1520     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1521     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1522     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1523     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1524     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1525 
1526     /* copy over the A part */
1527     Aloc = (Mat_SeqAIJ*)aij->A->data;
1528     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1529     row = mat->rmap->rstart;
1530     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1531     for (i=0; i<m; i++) {
1532       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1533       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1534     }
1535     aj = Aloc->j;
1536     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1537 
1538     /* copy over the B part */
1539     Aloc = (Mat_SeqAIJ*)aij->B->data;
1540     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1541     row  = mat->rmap->rstart;
1542     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1543     ct   = cols;
1544     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1545     for (i=0; i<m; i++) {
1546       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1547       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1548     }
1549     ierr = PetscFree(ct);CHKERRQ(ierr);
1550     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1551     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552     /*
1553        Everyone has to call to draw the matrix since the graphics waits are
1554        synchronized across all processors that share the PetscDraw object
1555     */
1556     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1557     if (!rank) {
1558       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1559       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1560       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1561       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1562     }
1563     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1564     ierr = MatDestroy(&A);CHKERRQ(ierr);
1565   }
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatView_MPIAIJ"
1571 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1572 {
1573   PetscErrorCode ierr;
1574   PetscBool      iascii,isdraw,issocket,isbinary;
1575 
1576   PetscFunctionBegin;
1577   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1578   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1579   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1580   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1581   if (iascii || isdraw || isbinary || issocket) {
1582     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1583   } else {
1584     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSOR_MPIAIJ"
1591 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1592 {
1593   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1594   PetscErrorCode ierr;
1595   Vec            bb1 = 0;
1596   PetscBool      hasop;
1597 
1598   PetscFunctionBegin;
1599   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1600     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1601   }
1602 
1603   if (flag == SOR_APPLY_UPPER) {
1604     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1605     PetscFunctionReturn(0);
1606   }
1607 
1608   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1609     if (flag & SOR_ZERO_INITIAL_GUESS) {
1610       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1611       its--;
1612     }
1613 
1614     while (its--) {
1615       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1616       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617 
1618       /* update rhs: bb1 = bb - B*x */
1619       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1620       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1621 
1622       /* local sweep */
1623       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1624     }
1625   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1626     if (flag & SOR_ZERO_INITIAL_GUESS) {
1627       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1628       its--;
1629     }
1630     while (its--) {
1631       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633 
1634       /* update rhs: bb1 = bb - B*x */
1635       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1636       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1637 
1638       /* local sweep */
1639       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1640     }
1641   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1642     if (flag & SOR_ZERO_INITIAL_GUESS) {
1643       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1644       its--;
1645     }
1646     while (its--) {
1647       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1649 
1650       /* update rhs: bb1 = bb - B*x */
1651       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1652       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1653 
1654       /* local sweep */
1655       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1656     }
1657   }  else if (flag & SOR_EISENSTAT) {
1658     Vec         xx1;
1659 
1660     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1661     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1662 
1663     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1665     if (!mat->diag) {
1666       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1667       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1668     }
1669     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1670     if (hasop) {
1671       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1672     } else {
1673       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1674     }
1675     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1676 
1677     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1678 
1679     /* local sweep */
1680     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1681     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1682     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1683   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1684 
1685   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatPermute_MPIAIJ"
1691 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1692 {
1693   MPI_Comm       comm;
1694   PetscInt       first,local_rowsize,local_colsize;
1695   const PetscInt *rows;
1696   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1697   PetscErrorCode ierr;
1698 
1699   PetscFunctionBegin;
1700   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1701   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1702   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1703   /* collect the global row permutation and invert it */
1704   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1705   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1706   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1707   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1708   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1709   /* get the local target indices */
1710   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1711   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1712   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1713   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1714   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1716   /* the column permutation is so much easier;
1717      make a local version of 'colp' and invert it */
1718   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1719   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1720   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1721   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1722   /* now we just get the submatrix */
1723   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1724   /* clean up */
1725   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1726   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1732 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1733 {
1734   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1735   Mat            A = mat->A,B = mat->B;
1736   PetscErrorCode ierr;
1737   PetscReal      isend[5],irecv[5];
1738 
1739   PetscFunctionBegin;
1740   info->block_size     = 1.0;
1741   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1742   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1743   isend[3] = info->memory;  isend[4] = info->mallocs;
1744   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1745   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1746   isend[3] += info->memory;  isend[4] += info->mallocs;
1747   if (flag == MAT_LOCAL) {
1748     info->nz_used      = isend[0];
1749     info->nz_allocated = isend[1];
1750     info->nz_unneeded  = isend[2];
1751     info->memory       = isend[3];
1752     info->mallocs      = isend[4];
1753   } else if (flag == MAT_GLOBAL_MAX) {
1754     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1755     info->nz_used      = irecv[0];
1756     info->nz_allocated = irecv[1];
1757     info->nz_unneeded  = irecv[2];
1758     info->memory       = irecv[3];
1759     info->mallocs      = irecv[4];
1760   } else if (flag == MAT_GLOBAL_SUM) {
1761     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1762     info->nz_used      = irecv[0];
1763     info->nz_allocated = irecv[1];
1764     info->nz_unneeded  = irecv[2];
1765     info->memory       = irecv[3];
1766     info->mallocs      = irecv[4];
1767   }
1768   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1769   info->fill_ratio_needed = 0;
1770   info->factor_mallocs    = 0;
1771 
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 #undef __FUNCT__
1776 #define __FUNCT__ "MatSetOption_MPIAIJ"
1777 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1778 {
1779   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1780   PetscErrorCode ierr;
1781 
1782   PetscFunctionBegin;
1783   switch (op) {
1784   case MAT_NEW_NONZERO_LOCATIONS:
1785   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1786   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1787   case MAT_KEEP_NONZERO_PATTERN:
1788   case MAT_NEW_NONZERO_LOCATION_ERR:
1789   case MAT_USE_INODES:
1790   case MAT_IGNORE_ZERO_ENTRIES:
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1793     break;
1794   case MAT_ROW_ORIENTED:
1795     a->roworiented = flg;
1796     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1797     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1798     break;
1799   case MAT_NEW_DIAGONALS:
1800     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1801     break;
1802   case MAT_IGNORE_OFF_PROC_ENTRIES:
1803     a->donotstash = flg;
1804     break;
1805   case MAT_SPD:
1806     A->spd_set                         = PETSC_TRUE;
1807     A->spd                             = flg;
1808     if (flg) {
1809       A->symmetric                     = PETSC_TRUE;
1810       A->structurally_symmetric        = PETSC_TRUE;
1811       A->symmetric_set                 = PETSC_TRUE;
1812       A->structurally_symmetric_set    = PETSC_TRUE;
1813     }
1814     break;
1815   case MAT_SYMMETRIC:
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     break;
1818   case MAT_STRUCTURALLY_SYMMETRIC:
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_HERMITIAN:
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     break;
1824   case MAT_SYMMETRY_ETERNAL:
1825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826     break;
1827   default:
1828     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1829   }
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatGetRow_MPIAIJ"
1835 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1836 {
1837   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1838   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1839   PetscErrorCode ierr;
1840   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1841   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1842   PetscInt       *cmap,*idx_p;
1843 
1844   PetscFunctionBegin;
1845   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1846   mat->getrowactive = PETSC_TRUE;
1847 
1848   if (!mat->rowvalues && (idx || v)) {
1849     /*
1850         allocate enough space to hold information from the longest row.
1851     */
1852     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1853     PetscInt   max = 1,tmp;
1854     for (i=0; i<matin->rmap->n; i++) {
1855       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1856       if (max < tmp) { max = tmp; }
1857     }
1858     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1859   }
1860 
1861   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1862   lrow = row - rstart;
1863 
1864   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1865   if (!v)   {pvA = 0; pvB = 0;}
1866   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1867   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1868   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1869   nztot = nzA + nzB;
1870 
1871   cmap  = mat->garray;
1872   if (v  || idx) {
1873     if (nztot) {
1874       /* Sort by increasing column numbers, assuming A and B already sorted */
1875       PetscInt imark = -1;
1876       if (v) {
1877         *v = v_p = mat->rowvalues;
1878         for (i=0; i<nzB; i++) {
1879           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1880           else break;
1881         }
1882         imark = i;
1883         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1884         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1885       }
1886       if (idx) {
1887         *idx = idx_p = mat->rowindices;
1888         if (imark > -1) {
1889           for (i=0; i<imark; i++) {
1890             idx_p[i] = cmap[cworkB[i]];
1891           }
1892         } else {
1893           for (i=0; i<nzB; i++) {
1894             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1895             else break;
1896           }
1897           imark = i;
1898         }
1899         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1900         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1901       }
1902     } else {
1903       if (idx) *idx = 0;
1904       if (v)   *v   = 0;
1905     }
1906   }
1907   *nz = nztot;
1908   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1909   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1915 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1916 {
1917   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1918 
1919   PetscFunctionBegin;
1920   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1921   aij->getrowactive = PETSC_FALSE;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatNorm_MPIAIJ"
1927 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1928 {
1929   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1930   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1931   PetscErrorCode ierr;
1932   PetscInt       i,j,cstart = mat->cmap->rstart;
1933   PetscReal      sum = 0.0;
1934   MatScalar      *v;
1935 
1936   PetscFunctionBegin;
1937   if (aij->size == 1) {
1938     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1939   } else {
1940     if (type == NORM_FROBENIUS) {
1941       v = amat->a;
1942       for (i=0; i<amat->nz; i++) {
1943 #if defined(PETSC_USE_COMPLEX)
1944         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1945 #else
1946         sum += (*v)*(*v); v++;
1947 #endif
1948       }
1949       v = bmat->a;
1950       for (i=0; i<bmat->nz; i++) {
1951 #if defined(PETSC_USE_COMPLEX)
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953 #else
1954         sum += (*v)*(*v); v++;
1955 #endif
1956       }
1957       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1958       *norm = PetscSqrtReal(*norm);
1959     } else if (type == NORM_1) { /* max column norm */
1960       PetscReal *tmp,*tmp2;
1961       PetscInt  *jj,*garray = aij->garray;
1962       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1963       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1964       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1965       *norm = 0.0;
1966       v = amat->a; jj = amat->j;
1967       for (j=0; j<amat->nz; j++) {
1968         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1969       }
1970       v = bmat->a; jj = bmat->j;
1971       for (j=0; j<bmat->nz; j++) {
1972         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1973       }
1974       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1975       for (j=0; j<mat->cmap->N; j++) {
1976         if (tmp2[j] > *norm) *norm = tmp2[j];
1977       }
1978       ierr = PetscFree(tmp);CHKERRQ(ierr);
1979       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1980     } else if (type == NORM_INFINITY) { /* max row norm */
1981       PetscReal ntemp = 0.0;
1982       for (j=0; j<aij->A->rmap->n; j++) {
1983         v = amat->a + amat->i[j];
1984         sum = 0.0;
1985         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1986           sum += PetscAbsScalar(*v); v++;
1987         }
1988         v = bmat->a + bmat->i[j];
1989         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1990           sum += PetscAbsScalar(*v); v++;
1991         }
1992         if (sum > ntemp) ntemp = sum;
1993       }
1994       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1995     } else {
1996       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1997     }
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatTranspose_MPIAIJ"
2004 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2005 {
2006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2007   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2008   PetscErrorCode ierr;
2009   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2010   PetscInt       cstart=A->cmap->rstart,ncol;
2011   Mat            B;
2012   MatScalar      *array;
2013 
2014   PetscFunctionBegin;
2015   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2016 
2017   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2018   ai = Aloc->i; aj = Aloc->j;
2019   bi = Bloc->i; bj = Bloc->j;
2020   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2021     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2022     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2023     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2024     for (i=0; i<ai[ma]; i++){
2025       d_nnz[aj[i]] ++;
2026       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2027     }
2028 
2029     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2030     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2031     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2032     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2033     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2034     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2035   } else {
2036     B = *matout;
2037   }
2038 
2039   /* copy over the A part */
2040   array = Aloc->a;
2041   row = A->rmap->rstart;
2042   for (i=0; i<ma; i++) {
2043     ncol = ai[i+1]-ai[i];
2044     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2045     row++; array += ncol; aj += ncol;
2046   }
2047   aj = Aloc->j;
2048   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2049 
2050   /* copy over the B part */
2051   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2052   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2053   array = Bloc->a;
2054   row = A->rmap->rstart;
2055   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2056   cols_tmp = cols;
2057   for (i=0; i<mb; i++) {
2058     ncol = bi[i+1]-bi[i];
2059     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2060     row++; array += ncol; cols_tmp += ncol;
2061   }
2062   ierr = PetscFree(cols);CHKERRQ(ierr);
2063 
2064   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2067     *matout = B;
2068   } else {
2069     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2076 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2077 {
2078   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2079   Mat            a = aij->A,b = aij->B;
2080   PetscErrorCode ierr;
2081   PetscInt       s1,s2,s3;
2082 
2083   PetscFunctionBegin;
2084   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2085   if (rr) {
2086     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2087     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2088     /* Overlap communication with computation. */
2089     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2090   }
2091   if (ll) {
2092     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2093     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2094     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2095   }
2096   /* scale  the diagonal block */
2097   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2098 
2099   if (rr) {
2100     /* Do a scatter end and then right scale the off-diagonal block */
2101     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2102     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2103   }
2104 
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2110 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2111 {
2112   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2113   PetscErrorCode ierr;
2114 
2115   PetscFunctionBegin;
2116   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2117   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2118   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2119   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2124 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2125 {
2126   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatEqual_MPIAIJ"
2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2137 {
2138   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2139   Mat            a,b,c,d;
2140   PetscBool      flg;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   a = matA->A; b = matA->B;
2145   c = matB->A; d = matB->B;
2146 
2147   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2148   if (flg) {
2149     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2150   }
2151   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatCopy_MPIAIJ"
2157 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2158 {
2159   PetscErrorCode ierr;
2160   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2161   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2162 
2163   PetscFunctionBegin;
2164   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2165   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2166     /* because of the column compression in the off-processor part of the matrix a->B,
2167        the number of columns in a->B and b->B may be different, hence we cannot call
2168        the MatCopy() directly on the two parts. If need be, we can provide a more
2169        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2170        then copying the submatrices */
2171     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2172   } else {
2173     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2174     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatSetUp_MPIAIJ"
2181 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2182 {
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatAXPY_MPIAIJ"
2192 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2193 {
2194   PetscErrorCode ierr;
2195   PetscInt       i;
2196   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2197   PetscBLASInt   bnz,one=1;
2198   Mat_SeqAIJ     *x,*y;
2199 
2200   PetscFunctionBegin;
2201   if (str == SAME_NONZERO_PATTERN) {
2202     PetscScalar alpha = a;
2203     x = (Mat_SeqAIJ *)xx->A->data;
2204     y = (Mat_SeqAIJ *)yy->A->data;
2205     bnz = PetscBLASIntCast(x->nz);
2206     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2207     x = (Mat_SeqAIJ *)xx->B->data;
2208     y = (Mat_SeqAIJ *)yy->B->data;
2209     bnz = PetscBLASIntCast(x->nz);
2210     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2211   } else if (str == SUBSET_NONZERO_PATTERN) {
2212     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2213 
2214     x = (Mat_SeqAIJ *)xx->B->data;
2215     y = (Mat_SeqAIJ *)yy->B->data;
2216     if (y->xtoy && y->XtoY != xx->B) {
2217       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2218       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2219     }
2220     if (!y->xtoy) { /* get xtoy */
2221       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2222       y->XtoY = xx->B;
2223       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2224     }
2225     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2226   } else {
2227     Mat B;
2228     PetscInt *nnz_d,*nnz_o;
2229     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2230     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2231     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2232     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2233     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2234     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2235     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2236     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2237     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2238     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2239     ierr = MatHeaderReplace(Y,B);
2240     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2241     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2242   }
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2247 
2248 #undef __FUNCT__
2249 #define __FUNCT__ "MatConjugate_MPIAIJ"
2250 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2251 {
2252 #if defined(PETSC_USE_COMPLEX)
2253   PetscErrorCode ierr;
2254   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2255 
2256   PetscFunctionBegin;
2257   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2258   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2259 #else
2260   PetscFunctionBegin;
2261 #endif
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatRealPart_MPIAIJ"
2267 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2268 {
2269   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2270   PetscErrorCode ierr;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2274   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2280 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2287   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #ifdef PETSC_HAVE_PBGL
2292 
2293 #include <boost/parallel/mpi/bsp_process_group.hpp>
2294 #include <boost/graph/distributed/ilu_default_graph.hpp>
2295 #include <boost/graph/distributed/ilu_0_block.hpp>
2296 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2297 #include <boost/graph/distributed/petsc/interface.hpp>
2298 #include <boost/multi_array.hpp>
2299 #include <boost/parallel/distributed_property_map->hpp>
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2303 /*
2304   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2305 */
2306 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2307 {
2308   namespace petsc = boost::distributed::petsc;
2309 
2310   namespace graph_dist = boost::graph::distributed;
2311   using boost::graph::distributed::ilu_default::process_group_type;
2312   using boost::graph::ilu_permuted;
2313 
2314   PetscBool       row_identity, col_identity;
2315   PetscContainer  c;
2316   PetscInt        m, n, M, N;
2317   PetscErrorCode  ierr;
2318 
2319   PetscFunctionBegin;
2320   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2321   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2322   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2323   if (!row_identity || !col_identity) {
2324     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2325   }
2326 
2327   process_group_type pg;
2328   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2329   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2330   lgraph_type&   level_graph = *lgraph_p;
2331   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2332 
2333   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2334   ilu_permuted(level_graph);
2335 
2336   /* put together the new matrix */
2337   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2338   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2339   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2340   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2341   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2342   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344 
2345   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2346   ierr = PetscContainerSetPointer(c, lgraph_p);
2347   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2348   ierr = PetscContainerDestroy(&c);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2354 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2355 {
2356   PetscFunctionBegin;
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatSolve_MPIAIJ"
2362 /*
2363   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2364 */
2365 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2366 {
2367   namespace graph_dist = boost::graph::distributed;
2368 
2369   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2370   lgraph_type*   lgraph_p;
2371   PetscContainer c;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2376   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2377   ierr = VecCopy(b, x);CHKERRQ(ierr);
2378 
2379   PetscScalar* array_x;
2380   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2381   PetscInt sx;
2382   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2383 
2384   PetscScalar* array_b;
2385   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2386   PetscInt sb;
2387   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2388 
2389   lgraph_type&   level_graph = *lgraph_p;
2390   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2391 
2392   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2393   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2394                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2395 
2396   typedef boost::iterator_property_map<array_ref_type::iterator,
2397                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2398   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2399                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2400 
2401   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2402 
2403   PetscFunctionReturn(0);
2404 }
2405 #endif
2406 
2407 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2408   PetscInt       nzlocal,nsends,nrecvs;
2409   PetscMPIInt    *send_rank,*recv_rank;
2410   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2411   PetscScalar    *sbuf_a,**rbuf_a;
2412   PetscErrorCode (*Destroy)(Mat);
2413 } Mat_Redundant;
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2417 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2418 {
2419   PetscErrorCode       ierr;
2420   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2421   PetscInt             i;
2422 
2423   PetscFunctionBegin;
2424   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2425   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2426   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2427   for (i=0; i<redund->nrecvs; i++){
2428     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2429     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2430   }
2431   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2432   ierr = PetscFree(redund);CHKERRQ(ierr);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatDestroy_MatRedundant"
2438 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2439 {
2440   PetscErrorCode  ierr;
2441   PetscContainer  container;
2442   Mat_Redundant   *redund=PETSC_NULL;
2443 
2444   PetscFunctionBegin;
2445   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2446   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2447   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2448   A->ops->destroy = redund->Destroy;
2449   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2450   if (A->ops->destroy) {
2451     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2452   }
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2458 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2459 {
2460   PetscMPIInt    rank,size;
2461   MPI_Comm       comm=((PetscObject)mat)->comm;
2462   PetscErrorCode ierr;
2463   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2464   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2465   PetscInt       *rowrange=mat->rmap->range;
2466   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2467   Mat            A=aij->A,B=aij->B,C=*matredundant;
2468   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2469   PetscScalar    *sbuf_a;
2470   PetscInt       nzlocal=a->nz+b->nz;
2471   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2472   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2473   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2474   MatScalar      *aworkA,*aworkB;
2475   PetscScalar    *vals;
2476   PetscMPIInt    tag1,tag2,tag3,imdex;
2477   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2478                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2479   MPI_Status     recv_status,*send_status;
2480   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2481   PetscInt       **rbuf_j=PETSC_NULL;
2482   PetscScalar    **rbuf_a=PETSC_NULL;
2483   Mat_Redundant  *redund=PETSC_NULL;
2484   PetscContainer container;
2485 
2486   PetscFunctionBegin;
2487   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2488   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2489 
2490   if (reuse == MAT_REUSE_MATRIX) {
2491     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2492     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2493     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2494     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2495     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2496     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2497     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2498     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2499 
2500     nsends    = redund->nsends;
2501     nrecvs    = redund->nrecvs;
2502     send_rank = redund->send_rank;
2503     recv_rank = redund->recv_rank;
2504     sbuf_nz   = redund->sbuf_nz;
2505     rbuf_nz   = redund->rbuf_nz;
2506     sbuf_j    = redund->sbuf_j;
2507     sbuf_a    = redund->sbuf_a;
2508     rbuf_j    = redund->rbuf_j;
2509     rbuf_a    = redund->rbuf_a;
2510   }
2511 
2512   if (reuse == MAT_INITIAL_MATRIX){
2513     PetscMPIInt  subrank,subsize;
2514     PetscInt     nleftover,np_subcomm;
2515     /* get the destination processors' id send_rank, nsends and nrecvs */
2516     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2517     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2518     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2519     np_subcomm = size/nsubcomm;
2520     nleftover  = size - nsubcomm*np_subcomm;
2521     nsends = 0; nrecvs = 0;
2522     for (i=0; i<size; i++){ /* i=rank*/
2523       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2524         send_rank[nsends] = i; nsends++;
2525         recv_rank[nrecvs++] = i;
2526       }
2527     }
2528     if (rank >= size - nleftover){/* this proc is a leftover processor */
2529       i = size-nleftover-1;
2530       j = 0;
2531       while (j < nsubcomm - nleftover){
2532         send_rank[nsends++] = i;
2533         i--; j++;
2534       }
2535     }
2536 
2537     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2538       for (i=0; i<nleftover; i++){
2539         recv_rank[nrecvs++] = size-nleftover+i;
2540       }
2541     }
2542 
2543     /* allocate sbuf_j, sbuf_a */
2544     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2545     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2546     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2547   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2548 
2549   /* copy mat's local entries into the buffers */
2550   if (reuse == MAT_INITIAL_MATRIX){
2551     rownz_max = 0;
2552     rptr = sbuf_j;
2553     cols = sbuf_j + rend-rstart + 1;
2554     vals = sbuf_a;
2555     rptr[0] = 0;
2556     for (i=0; i<rend-rstart; i++){
2557       row = i + rstart;
2558       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2559       ncols  = nzA + nzB;
2560       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2561       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2562       /* load the column indices for this row into cols */
2563       lwrite = 0;
2564       for (l=0; l<nzB; l++) {
2565         if ((ctmp = bmap[cworkB[l]]) < cstart){
2566           vals[lwrite]   = aworkB[l];
2567           cols[lwrite++] = ctmp;
2568         }
2569       }
2570       for (l=0; l<nzA; l++){
2571         vals[lwrite]   = aworkA[l];
2572         cols[lwrite++] = cstart + cworkA[l];
2573       }
2574       for (l=0; l<nzB; l++) {
2575         if ((ctmp = bmap[cworkB[l]]) >= cend){
2576           vals[lwrite]   = aworkB[l];
2577           cols[lwrite++] = ctmp;
2578         }
2579       }
2580       vals += ncols;
2581       cols += ncols;
2582       rptr[i+1] = rptr[i] + ncols;
2583       if (rownz_max < ncols) rownz_max = ncols;
2584     }
2585     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2586   } else { /* only copy matrix values into sbuf_a */
2587     rptr = sbuf_j;
2588     vals = sbuf_a;
2589     rptr[0] = 0;
2590     for (i=0; i<rend-rstart; i++){
2591       row = i + rstart;
2592       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2593       ncols  = nzA + nzB;
2594       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2595       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2596       lwrite = 0;
2597       for (l=0; l<nzB; l++) {
2598         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2599       }
2600       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2601       for (l=0; l<nzB; l++) {
2602         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2603       }
2604       vals += ncols;
2605       rptr[i+1] = rptr[i] + ncols;
2606     }
2607   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2608 
2609   /* send nzlocal to others, and recv other's nzlocal */
2610   /*--------------------------------------------------*/
2611   if (reuse == MAT_INITIAL_MATRIX){
2612     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2613     s_waits2 = s_waits3 + nsends;
2614     s_waits1 = s_waits2 + nsends;
2615     r_waits1 = s_waits1 + nsends;
2616     r_waits2 = r_waits1 + nrecvs;
2617     r_waits3 = r_waits2 + nrecvs;
2618   } else {
2619     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2620     r_waits3 = s_waits3 + nsends;
2621   }
2622 
2623   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2624   if (reuse == MAT_INITIAL_MATRIX){
2625     /* get new tags to keep the communication clean */
2626     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2627     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2628     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2629 
2630     /* post receives of other's nzlocal */
2631     for (i=0; i<nrecvs; i++){
2632       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2633     }
2634     /* send nzlocal to others */
2635     for (i=0; i<nsends; i++){
2636       sbuf_nz[i] = nzlocal;
2637       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2638     }
2639     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2640     count = nrecvs;
2641     while (count) {
2642       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2643       recv_rank[imdex] = recv_status.MPI_SOURCE;
2644       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2645       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2646 
2647       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2648       rbuf_nz[imdex] += i + 2;
2649       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2650       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2651       count--;
2652     }
2653     /* wait on sends of nzlocal */
2654     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2655     /* send mat->i,j to others, and recv from other's */
2656     /*------------------------------------------------*/
2657     for (i=0; i<nsends; i++){
2658       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2659       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2660     }
2661     /* wait on receives of mat->i,j */
2662     /*------------------------------*/
2663     count = nrecvs;
2664     while (count) {
2665       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2666       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2667       count--;
2668     }
2669     /* wait on sends of mat->i,j */
2670     /*---------------------------*/
2671     if (nsends) {
2672       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2673     }
2674   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2675 
2676   /* post receives, send and receive mat->a */
2677   /*----------------------------------------*/
2678   for (imdex=0; imdex<nrecvs; imdex++) {
2679     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2680   }
2681   for (i=0; i<nsends; i++){
2682     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2683   }
2684   count = nrecvs;
2685   while (count) {
2686     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2687     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2688     count--;
2689   }
2690   if (nsends) {
2691     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2692   }
2693 
2694   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2695 
2696   /* create redundant matrix */
2697   /*-------------------------*/
2698   if (reuse == MAT_INITIAL_MATRIX){
2699     /* compute rownz_max for preallocation */
2700     for (imdex=0; imdex<nrecvs; imdex++){
2701       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2702       rptr = rbuf_j[imdex];
2703       for (i=0; i<j; i++){
2704         ncols = rptr[i+1] - rptr[i];
2705         if (rownz_max < ncols) rownz_max = ncols;
2706       }
2707     }
2708 
2709     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2710     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2711     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2712     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2713     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2714   } else {
2715     C = *matredundant;
2716   }
2717 
2718   /* insert local matrix entries */
2719   rptr = sbuf_j;
2720   cols = sbuf_j + rend-rstart + 1;
2721   vals = sbuf_a;
2722   for (i=0; i<rend-rstart; i++){
2723     row   = i + rstart;
2724     ncols = rptr[i+1] - rptr[i];
2725     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2726     vals += ncols;
2727     cols += ncols;
2728   }
2729   /* insert received matrix entries */
2730   for (imdex=0; imdex<nrecvs; imdex++){
2731     rstart = rowrange[recv_rank[imdex]];
2732     rend   = rowrange[recv_rank[imdex]+1];
2733     rptr = rbuf_j[imdex];
2734     cols = rbuf_j[imdex] + rend-rstart + 1;
2735     vals = rbuf_a[imdex];
2736     for (i=0; i<rend-rstart; i++){
2737       row   = i + rstart;
2738       ncols = rptr[i+1] - rptr[i];
2739       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2740       vals += ncols;
2741       cols += ncols;
2742     }
2743   }
2744   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2745   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2746   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2747   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2748   if (reuse == MAT_INITIAL_MATRIX) {
2749     PetscContainer container;
2750     *matredundant = C;
2751     /* create a supporting struct and attach it to C for reuse */
2752     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2753     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2754     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2755     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2756     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2757     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2758 
2759     redund->nzlocal = nzlocal;
2760     redund->nsends  = nsends;
2761     redund->nrecvs  = nrecvs;
2762     redund->send_rank = send_rank;
2763     redund->recv_rank = recv_rank;
2764     redund->sbuf_nz = sbuf_nz;
2765     redund->rbuf_nz = rbuf_nz;
2766     redund->sbuf_j  = sbuf_j;
2767     redund->sbuf_a  = sbuf_a;
2768     redund->rbuf_j  = rbuf_j;
2769     redund->rbuf_a  = rbuf_a;
2770 
2771     redund->Destroy = C->ops->destroy;
2772     C->ops->destroy = MatDestroy_MatRedundant;
2773   }
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 #undef __FUNCT__
2778 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2779 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2780 {
2781   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2782   PetscErrorCode ierr;
2783   PetscInt       i,*idxb = 0;
2784   PetscScalar    *va,*vb;
2785   Vec            vtmp;
2786 
2787   PetscFunctionBegin;
2788   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2789   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2790   if (idx) {
2791     for (i=0; i<A->rmap->n; i++) {
2792       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2793     }
2794   }
2795 
2796   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2797   if (idx) {
2798     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2799   }
2800   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2801   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2802 
2803   for (i=0; i<A->rmap->n; i++){
2804     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2805       va[i] = vb[i];
2806       if (idx) idx[i] = a->garray[idxb[i]];
2807     }
2808   }
2809 
2810   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2811   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2812   ierr = PetscFree(idxb);CHKERRQ(ierr);
2813   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 #undef __FUNCT__
2818 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2819 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2820 {
2821   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2822   PetscErrorCode ierr;
2823   PetscInt       i,*idxb = 0;
2824   PetscScalar    *va,*vb;
2825   Vec            vtmp;
2826 
2827   PetscFunctionBegin;
2828   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2829   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2830   if (idx) {
2831     for (i=0; i<A->cmap->n; i++) {
2832       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2833     }
2834   }
2835 
2836   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2837   if (idx) {
2838     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2839   }
2840   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2841   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2842 
2843   for (i=0; i<A->rmap->n; i++){
2844     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2845       va[i] = vb[i];
2846       if (idx) idx[i] = a->garray[idxb[i]];
2847     }
2848   }
2849 
2850   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2851   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2852   ierr = PetscFree(idxb);CHKERRQ(ierr);
2853   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2859 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2860 {
2861   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2862   PetscInt       n      = A->rmap->n;
2863   PetscInt       cstart = A->cmap->rstart;
2864   PetscInt      *cmap   = mat->garray;
2865   PetscInt      *diagIdx, *offdiagIdx;
2866   Vec            diagV, offdiagV;
2867   PetscScalar   *a, *diagA, *offdiagA;
2868   PetscInt       r;
2869   PetscErrorCode ierr;
2870 
2871   PetscFunctionBegin;
2872   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2873   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2874   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2875   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2876   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2877   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2878   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2879   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2880   for(r = 0; r < n; ++r) {
2881     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2882       a[r]   = diagA[r];
2883       idx[r] = cstart + diagIdx[r];
2884     } else {
2885       a[r]   = offdiagA[r];
2886       idx[r] = cmap[offdiagIdx[r]];
2887     }
2888   }
2889   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2890   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2891   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2892   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2893   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2894   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 #undef __FUNCT__
2899 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2900 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2901 {
2902   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2903   PetscInt       n      = A->rmap->n;
2904   PetscInt       cstart = A->cmap->rstart;
2905   PetscInt      *cmap   = mat->garray;
2906   PetscInt      *diagIdx, *offdiagIdx;
2907   Vec            diagV, offdiagV;
2908   PetscScalar   *a, *diagA, *offdiagA;
2909   PetscInt       r;
2910   PetscErrorCode ierr;
2911 
2912   PetscFunctionBegin;
2913   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2914   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2915   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2916   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2917   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2918   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2919   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2920   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2921   for(r = 0; r < n; ++r) {
2922     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2923       a[r]   = diagA[r];
2924       idx[r] = cstart + diagIdx[r];
2925     } else {
2926       a[r]   = offdiagA[r];
2927       idx[r] = cmap[offdiagIdx[r]];
2928     }
2929   }
2930   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2931   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2932   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2933   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2934   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2935   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 #undef __FUNCT__
2940 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2941 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2942 {
2943   PetscErrorCode ierr;
2944   Mat            *dummy;
2945 
2946   PetscFunctionBegin;
2947   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2948   *newmat = *dummy;
2949   ierr = PetscFree(dummy);CHKERRQ(ierr);
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2954 
2955 #undef __FUNCT__
2956 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2957 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2958 {
2959   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2960   PetscErrorCode ierr;
2961 
2962   PetscFunctionBegin;
2963   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 
2968 /* -------------------------------------------------------------------*/
2969 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2970        MatGetRow_MPIAIJ,
2971        MatRestoreRow_MPIAIJ,
2972        MatMult_MPIAIJ,
2973 /* 4*/ MatMultAdd_MPIAIJ,
2974        MatMultTranspose_MPIAIJ,
2975        MatMultTransposeAdd_MPIAIJ,
2976 #ifdef PETSC_HAVE_PBGL
2977        MatSolve_MPIAIJ,
2978 #else
2979        0,
2980 #endif
2981        0,
2982        0,
2983 /*10*/ 0,
2984        0,
2985        0,
2986        MatSOR_MPIAIJ,
2987        MatTranspose_MPIAIJ,
2988 /*15*/ MatGetInfo_MPIAIJ,
2989        MatEqual_MPIAIJ,
2990        MatGetDiagonal_MPIAIJ,
2991        MatDiagonalScale_MPIAIJ,
2992        MatNorm_MPIAIJ,
2993 /*20*/ MatAssemblyBegin_MPIAIJ,
2994        MatAssemblyEnd_MPIAIJ,
2995        MatSetOption_MPIAIJ,
2996        MatZeroEntries_MPIAIJ,
2997 /*24*/ MatZeroRows_MPIAIJ,
2998        0,
2999 #ifdef PETSC_HAVE_PBGL
3000        0,
3001 #else
3002        0,
3003 #endif
3004        0,
3005        0,
3006 /*29*/ MatSetUp_MPIAIJ,
3007 #ifdef PETSC_HAVE_PBGL
3008        0,
3009 #else
3010        0,
3011 #endif
3012        0,
3013        0,
3014        0,
3015 /*34*/ MatDuplicate_MPIAIJ,
3016        0,
3017        0,
3018        0,
3019        0,
3020 /*39*/ MatAXPY_MPIAIJ,
3021        MatGetSubMatrices_MPIAIJ,
3022        MatIncreaseOverlap_MPIAIJ,
3023        MatGetValues_MPIAIJ,
3024        MatCopy_MPIAIJ,
3025 /*44*/ MatGetRowMax_MPIAIJ,
3026        MatScale_MPIAIJ,
3027        0,
3028        0,
3029        MatZeroRowsColumns_MPIAIJ,
3030 /*49*/ MatSetBlockSize_MPIAIJ,
3031        0,
3032        0,
3033        0,
3034        0,
3035 /*54*/ MatFDColoringCreate_MPIAIJ,
3036        0,
3037        MatSetUnfactored_MPIAIJ,
3038        MatPermute_MPIAIJ,
3039        0,
3040 /*59*/ MatGetSubMatrix_MPIAIJ,
3041        MatDestroy_MPIAIJ,
3042        MatView_MPIAIJ,
3043        0,
3044        0,
3045 /*64*/ 0,
3046        0,
3047        0,
3048        0,
3049        0,
3050 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3051        MatGetRowMinAbs_MPIAIJ,
3052        0,
3053        MatSetColoring_MPIAIJ,
3054 #if defined(PETSC_HAVE_ADIC)
3055        MatSetValuesAdic_MPIAIJ,
3056 #else
3057        0,
3058 #endif
3059        MatSetValuesAdifor_MPIAIJ,
3060 /*75*/ MatFDColoringApply_AIJ,
3061        0,
3062        0,
3063        0,
3064        0,
3065 /*80*/ 0,
3066        0,
3067        0,
3068 /*83*/ MatLoad_MPIAIJ,
3069        0,
3070        0,
3071        0,
3072        0,
3073        0,
3074 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3075        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3076        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3077        MatPtAP_Basic,
3078        MatPtAPSymbolic_MPIAIJ,
3079 /*94*/ MatPtAPNumeric_MPIAIJ,
3080        0,
3081        0,
3082        0,
3083        0,
3084 /*99*/ 0,
3085        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3086        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3087        MatConjugate_MPIAIJ,
3088        0,
3089 /*104*/MatSetValuesRow_MPIAIJ,
3090        MatRealPart_MPIAIJ,
3091        MatImaginaryPart_MPIAIJ,
3092        0,
3093        0,
3094 /*109*/0,
3095        MatGetRedundantMatrix_MPIAIJ,
3096        MatGetRowMin_MPIAIJ,
3097        0,
3098        0,
3099 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3100        0,
3101        0,
3102        0,
3103        0,
3104 /*119*/0,
3105        0,
3106        0,
3107        0,
3108        MatGetMultiProcBlock_MPIAIJ,
3109 /*124*/MatFindNonZeroRows_MPIAIJ,
3110        MatGetColumnNorms_MPIAIJ,
3111        MatInvertBlockDiagonal_MPIAIJ,
3112        0,
3113        MatGetSubMatricesParallel_MPIAIJ,
3114 /*129*/0,
3115        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3116        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3117        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3118        0,
3119 /*134*/0,
3120        0,
3121        0,
3122        0,
3123        0
3124 };
3125 
3126 /* ----------------------------------------------------------------------------------------*/
3127 
3128 EXTERN_C_BEGIN
3129 #undef __FUNCT__
3130 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3131 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3132 {
3133   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3134   PetscErrorCode ierr;
3135 
3136   PetscFunctionBegin;
3137   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3138   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3139   PetscFunctionReturn(0);
3140 }
3141 EXTERN_C_END
3142 
3143 EXTERN_C_BEGIN
3144 #undef __FUNCT__
3145 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3146 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3147 {
3148   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3149   PetscErrorCode ierr;
3150 
3151   PetscFunctionBegin;
3152   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3153   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3154   PetscFunctionReturn(0);
3155 }
3156 EXTERN_C_END
3157 
3158 EXTERN_C_BEGIN
3159 #undef __FUNCT__
3160 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3161 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3162 {
3163   Mat_MPIAIJ     *b;
3164   PetscErrorCode ierr;
3165   PetscInt       i;
3166   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3167 
3168   PetscFunctionBegin;
3169   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3170   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3171   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3172   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3173   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3174   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3175 
3176   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3177   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3178   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3180   if (d_nnz) {
3181     for (i=0; i<B->rmap->n; i++) {
3182       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3183     }
3184   }
3185   if (o_nnz) {
3186     for (i=0; i<B->rmap->n; i++) {
3187       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3188     }
3189   }
3190   b = (Mat_MPIAIJ*)B->data;
3191 
3192   if (!B->preallocated) {
3193     /* Explicitly create 2 MATSEQAIJ matrices. */
3194     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3195     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3196     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3197     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3198     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3199     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3200     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3201     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3202   }
3203 
3204   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3205   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3206   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3207   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3208   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3209   B->preallocated = PETSC_TRUE;
3210   PetscFunctionReturn(0);
3211 }
3212 EXTERN_C_END
3213 
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3216 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3217 {
3218   Mat            mat;
3219   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3220   PetscErrorCode ierr;
3221 
3222   PetscFunctionBegin;
3223   *newmat       = 0;
3224   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3225   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3226   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3227   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3228   a    = (Mat_MPIAIJ*)mat->data;
3229 
3230   mat->factortype    = matin->factortype;
3231   mat->rmap->bs      = matin->rmap->bs;
3232   mat->assembled    = PETSC_TRUE;
3233   mat->insertmode   = NOT_SET_VALUES;
3234   mat->preallocated = PETSC_TRUE;
3235 
3236   a->size           = oldmat->size;
3237   a->rank           = oldmat->rank;
3238   a->donotstash     = oldmat->donotstash;
3239   a->roworiented    = oldmat->roworiented;
3240   a->rowindices     = 0;
3241   a->rowvalues      = 0;
3242   a->getrowactive   = PETSC_FALSE;
3243 
3244   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3245   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3246 
3247   if (oldmat->colmap) {
3248 #if defined (PETSC_USE_CTABLE)
3249     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3250 #else
3251     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3252     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3253     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3254 #endif
3255   } else a->colmap = 0;
3256   if (oldmat->garray) {
3257     PetscInt len;
3258     len  = oldmat->B->cmap->n;
3259     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3260     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3261     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3262   } else a->garray = 0;
3263 
3264   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3265   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3266   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3268   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3269   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3270   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3271   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3272   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3273   *newmat = mat;
3274   PetscFunctionReturn(0);
3275 }
3276 
3277 
3278 
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatLoad_MPIAIJ"
3281 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3282 {
3283   PetscScalar    *vals,*svals;
3284   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3285   PetscErrorCode ierr;
3286   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3287   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3288   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3289   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3290   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3291   int            fd;
3292 
3293   PetscFunctionBegin;
3294   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3295   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3296   if (!rank) {
3297     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3298     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3299     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3300   }
3301 
3302   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3303 
3304   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3305   M = header[1]; N = header[2];
3306   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3307   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3308   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3309 
3310   /* If global sizes are set, check if they are consistent with that given in the file */
3311   if (sizesset) {
3312     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3313   }
3314   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3315   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3316 
3317   /* determine ownership of all rows */
3318   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3319   else m = newMat->rmap->n; /* Set by user */
3320 
3321   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3322   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3323 
3324   /* First process needs enough room for process with most rows */
3325   if (!rank) {
3326     mmax       = rowners[1];
3327     for (i=2; i<size; i++) {
3328       mmax = PetscMax(mmax,rowners[i]);
3329     }
3330   } else mmax = m;
3331 
3332   rowners[0] = 0;
3333   for (i=2; i<=size; i++) {
3334     rowners[i] += rowners[i-1];
3335   }
3336   rstart = rowners[rank];
3337   rend   = rowners[rank+1];
3338 
3339   /* distribute row lengths to all processors */
3340   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3341   if (!rank) {
3342     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3343     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3344     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3345     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3346     for (j=0; j<m; j++) {
3347       procsnz[0] += ourlens[j];
3348     }
3349     for (i=1; i<size; i++) {
3350       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3351       /* calculate the number of nonzeros on each processor */
3352       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3353         procsnz[i] += rowlengths[j];
3354       }
3355       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3356     }
3357     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3358   } else {
3359     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3360   }
3361 
3362   if (!rank) {
3363     /* determine max buffer needed and allocate it */
3364     maxnz = 0;
3365     for (i=0; i<size; i++) {
3366       maxnz = PetscMax(maxnz,procsnz[i]);
3367     }
3368     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3369 
3370     /* read in my part of the matrix column indices  */
3371     nz   = procsnz[0];
3372     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3373     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3374 
3375     /* read in every one elses and ship off */
3376     for (i=1; i<size; i++) {
3377       nz     = procsnz[i];
3378       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3379       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3380     }
3381     ierr = PetscFree(cols);CHKERRQ(ierr);
3382   } else {
3383     /* determine buffer space needed for message */
3384     nz = 0;
3385     for (i=0; i<m; i++) {
3386       nz += ourlens[i];
3387     }
3388     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3389 
3390     /* receive message of column indices*/
3391     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3392   }
3393 
3394   /* determine column ownership if matrix is not square */
3395   if (N != M) {
3396     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3397     else n = newMat->cmap->n;
3398     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3399     cstart = cend - n;
3400   } else {
3401     cstart = rstart;
3402     cend   = rend;
3403     n      = cend - cstart;
3404   }
3405 
3406   /* loop over local rows, determining number of off diagonal entries */
3407   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3408   jj = 0;
3409   for (i=0; i<m; i++) {
3410     for (j=0; j<ourlens[i]; j++) {
3411       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3412       jj++;
3413     }
3414   }
3415 
3416   for (i=0; i<m; i++) {
3417     ourlens[i] -= offlens[i];
3418   }
3419   if (!sizesset) {
3420     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3421   }
3422   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3423 
3424   for (i=0; i<m; i++) {
3425     ourlens[i] += offlens[i];
3426   }
3427 
3428   if (!rank) {
3429     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3430 
3431     /* read in my part of the matrix numerical values  */
3432     nz   = procsnz[0];
3433     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3434 
3435     /* insert into matrix */
3436     jj      = rstart;
3437     smycols = mycols;
3438     svals   = vals;
3439     for (i=0; i<m; i++) {
3440       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3441       smycols += ourlens[i];
3442       svals   += ourlens[i];
3443       jj++;
3444     }
3445 
3446     /* read in other processors and ship out */
3447     for (i=1; i<size; i++) {
3448       nz     = procsnz[i];
3449       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3450       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3451     }
3452     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3453   } else {
3454     /* receive numeric values */
3455     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3456 
3457     /* receive message of values*/
3458     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3459 
3460     /* insert into matrix */
3461     jj      = rstart;
3462     smycols = mycols;
3463     svals   = vals;
3464     for (i=0; i<m; i++) {
3465       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3466       smycols += ourlens[i];
3467       svals   += ourlens[i];
3468       jj++;
3469     }
3470   }
3471   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3472   ierr = PetscFree(vals);CHKERRQ(ierr);
3473   ierr = PetscFree(mycols);CHKERRQ(ierr);
3474   ierr = PetscFree(rowners);CHKERRQ(ierr);
3475 
3476   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3477   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 #undef __FUNCT__
3482 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3483 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3484 {
3485   PetscErrorCode ierr;
3486   IS             iscol_local;
3487   PetscInt       csize;
3488 
3489   PetscFunctionBegin;
3490   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3491   if (call == MAT_REUSE_MATRIX) {
3492     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3493     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3494   } else {
3495     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3496   }
3497   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3498   if (call == MAT_INITIAL_MATRIX) {
3499     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3500     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3501   }
3502   PetscFunctionReturn(0);
3503 }
3504 
3505 #undef __FUNCT__
3506 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3507 /*
3508     Not great since it makes two copies of the submatrix, first an SeqAIJ
3509   in local and then by concatenating the local matrices the end result.
3510   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3511 
3512   Note: This requires a sequential iscol with all indices.
3513 */
3514 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3515 {
3516   PetscErrorCode ierr;
3517   PetscMPIInt    rank,size;
3518   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3519   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3520   Mat            *local,M,Mreuse;
3521   MatScalar      *vwork,*aa;
3522   MPI_Comm       comm = ((PetscObject)mat)->comm;
3523   Mat_SeqAIJ     *aij;
3524 
3525 
3526   PetscFunctionBegin;
3527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3529 
3530   if (call ==  MAT_REUSE_MATRIX) {
3531     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3532     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3533     local = &Mreuse;
3534     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3535   } else {
3536     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3537     Mreuse = *local;
3538     ierr   = PetscFree(local);CHKERRQ(ierr);
3539   }
3540 
3541   /*
3542       m - number of local rows
3543       n - number of columns (same on all processors)
3544       rstart - first row in new global matrix generated
3545   */
3546   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3547   if (call == MAT_INITIAL_MATRIX) {
3548     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3549     ii  = aij->i;
3550     jj  = aij->j;
3551 
3552     /*
3553         Determine the number of non-zeros in the diagonal and off-diagonal
3554         portions of the matrix in order to do correct preallocation
3555     */
3556 
3557     /* first get start and end of "diagonal" columns */
3558     if (csize == PETSC_DECIDE) {
3559       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3560       if (mglobal == n) { /* square matrix */
3561 	nlocal = m;
3562       } else {
3563         nlocal = n/size + ((n % size) > rank);
3564       }
3565     } else {
3566       nlocal = csize;
3567     }
3568     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3569     rstart = rend - nlocal;
3570     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3571 
3572     /* next, compute all the lengths */
3573     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3574     olens = dlens + m;
3575     for (i=0; i<m; i++) {
3576       jend = ii[i+1] - ii[i];
3577       olen = 0;
3578       dlen = 0;
3579       for (j=0; j<jend; j++) {
3580         if (*jj < rstart || *jj >= rend) olen++;
3581         else dlen++;
3582         jj++;
3583       }
3584       olens[i] = olen;
3585       dlens[i] = dlen;
3586     }
3587     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3588     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3589     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3590     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3591     ierr = PetscFree(dlens);CHKERRQ(ierr);
3592   } else {
3593     PetscInt ml,nl;
3594 
3595     M = *newmat;
3596     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3597     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3598     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3599     /*
3600          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3601        rather than the slower MatSetValues().
3602     */
3603     M->was_assembled = PETSC_TRUE;
3604     M->assembled     = PETSC_FALSE;
3605   }
3606   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3607   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3608   ii  = aij->i;
3609   jj  = aij->j;
3610   aa  = aij->a;
3611   for (i=0; i<m; i++) {
3612     row   = rstart + i;
3613     nz    = ii[i+1] - ii[i];
3614     cwork = jj;     jj += nz;
3615     vwork = aa;     aa += nz;
3616     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3617   }
3618 
3619   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3620   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3621   *newmat = M;
3622 
3623   /* save submatrix used in processor for next request */
3624   if (call ==  MAT_INITIAL_MATRIX) {
3625     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3626     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3627   }
3628 
3629   PetscFunctionReturn(0);
3630 }
3631 
3632 EXTERN_C_BEGIN
3633 #undef __FUNCT__
3634 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3635 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3636 {
3637   PetscInt       m,cstart, cend,j,nnz,i,d;
3638   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3639   const PetscInt *JJ;
3640   PetscScalar    *values;
3641   PetscErrorCode ierr;
3642 
3643   PetscFunctionBegin;
3644   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3645 
3646   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3647   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3648   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3649   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3650   m      = B->rmap->n;
3651   cstart = B->cmap->rstart;
3652   cend   = B->cmap->rend;
3653   rstart = B->rmap->rstart;
3654 
3655   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3656 
3657 #if defined(PETSC_USE_DEBUGGING)
3658   for (i=0; i<m; i++) {
3659     nnz     = Ii[i+1]- Ii[i];
3660     JJ      = J + Ii[i];
3661     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3662     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3663     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3664   }
3665 #endif
3666 
3667   for (i=0; i<m; i++) {
3668     nnz     = Ii[i+1]- Ii[i];
3669     JJ      = J + Ii[i];
3670     nnz_max = PetscMax(nnz_max,nnz);
3671     d       = 0;
3672     for (j=0; j<nnz; j++) {
3673       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3674     }
3675     d_nnz[i] = d;
3676     o_nnz[i] = nnz - d;
3677   }
3678   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3679   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3680 
3681   if (v) values = (PetscScalar*)v;
3682   else {
3683     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3684     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3685   }
3686 
3687   for (i=0; i<m; i++) {
3688     ii   = i + rstart;
3689     nnz  = Ii[i+1]- Ii[i];
3690     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3691   }
3692   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3693   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3694 
3695   if (!v) {
3696     ierr = PetscFree(values);CHKERRQ(ierr);
3697   }
3698   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3699   PetscFunctionReturn(0);
3700 }
3701 EXTERN_C_END
3702 
3703 #undef __FUNCT__
3704 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3705 /*@
3706    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3707    (the default parallel PETSc format).
3708 
3709    Collective on MPI_Comm
3710 
3711    Input Parameters:
3712 +  B - the matrix
3713 .  i - the indices into j for the start of each local row (starts with zero)
3714 .  j - the column indices for each local row (starts with zero)
3715 -  v - optional values in the matrix
3716 
3717    Level: developer
3718 
3719    Notes:
3720        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3721      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3722      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3723 
3724        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3725 
3726        The format which is used for the sparse matrix input, is equivalent to a
3727     row-major ordering.. i.e for the following matrix, the input data expected is
3728     as shown:
3729 
3730         1 0 0
3731         2 0 3     P0
3732        -------
3733         4 5 6     P1
3734 
3735      Process0 [P0]: rows_owned=[0,1]
3736         i =  {0,1,3}  [size = nrow+1  = 2+1]
3737         j =  {0,0,2}  [size = nz = 6]
3738         v =  {1,2,3}  [size = nz = 6]
3739 
3740      Process1 [P1]: rows_owned=[2]
3741         i =  {0,3}    [size = nrow+1  = 1+1]
3742         j =  {0,1,2}  [size = nz = 6]
3743         v =  {4,5,6}  [size = nz = 6]
3744 
3745 .keywords: matrix, aij, compressed row, sparse, parallel
3746 
3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3748           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3749 @*/
3750 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3751 {
3752   PetscErrorCode ierr;
3753 
3754   PetscFunctionBegin;
3755   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3761 /*@C
3762    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3763    (the default parallel PETSc format).  For good matrix assembly performance
3764    the user should preallocate the matrix storage by setting the parameters
3765    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3766    performance can be increased by more than a factor of 50.
3767 
3768    Collective on MPI_Comm
3769 
3770    Input Parameters:
3771 +  A - the matrix
3772 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3773            (same value is used for all local rows)
3774 .  d_nnz - array containing the number of nonzeros in the various rows of the
3775            DIAGONAL portion of the local submatrix (possibly different for each row)
3776            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3777            The size of this array is equal to the number of local rows, i.e 'm'.
3778            For matrices that will be factored, you must leave room for (and set)
3779            the diagonal entry even if it is zero.
3780 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3781            submatrix (same value is used for all local rows).
3782 -  o_nnz - array containing the number of nonzeros in the various rows of the
3783            OFF-DIAGONAL portion of the local submatrix (possibly different for
3784            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3785            structure. The size of this array is equal to the number
3786            of local rows, i.e 'm'.
3787 
3788    If the *_nnz parameter is given then the *_nz parameter is ignored
3789 
3790    The AIJ format (also called the Yale sparse matrix format or
3791    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3792    storage.  The stored row and column indices begin with zero.
3793    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3794 
3795    The parallel matrix is partitioned such that the first m0 rows belong to
3796    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3797    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3798 
3799    The DIAGONAL portion of the local submatrix of a processor can be defined
3800    as the submatrix which is obtained by extraction the part corresponding to
3801    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3802    first row that belongs to the processor, r2 is the last row belonging to
3803    the this processor, and c1-c2 is range of indices of the local part of a
3804    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3805    common case of a square matrix, the row and column ranges are the same and
3806    the DIAGONAL part is also square. The remaining portion of the local
3807    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3808 
3809    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3810 
3811    You can call MatGetInfo() to get information on how effective the preallocation was;
3812    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3813    You can also run with the option -info and look for messages with the string
3814    malloc in them to see if additional memory allocation was needed.
3815 
3816    Example usage:
3817 
3818    Consider the following 8x8 matrix with 34 non-zero values, that is
3819    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3820    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3821    as follows:
3822 
3823 .vb
3824             1  2  0  |  0  3  0  |  0  4
3825     Proc0   0  5  6  |  7  0  0  |  8  0
3826             9  0 10  | 11  0  0  | 12  0
3827     -------------------------------------
3828            13  0 14  | 15 16 17  |  0  0
3829     Proc1   0 18  0  | 19 20 21  |  0  0
3830             0  0  0  | 22 23  0  | 24  0
3831     -------------------------------------
3832     Proc2  25 26 27  |  0  0 28  | 29  0
3833            30  0  0  | 31 32 33  |  0 34
3834 .ve
3835 
3836    This can be represented as a collection of submatrices as:
3837 
3838 .vb
3839       A B C
3840       D E F
3841       G H I
3842 .ve
3843 
3844    Where the submatrices A,B,C are owned by proc0, D,E,F are
3845    owned by proc1, G,H,I are owned by proc2.
3846 
3847    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3848    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3849    The 'M','N' parameters are 8,8, and have the same values on all procs.
3850 
3851    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3852    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3853    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3854    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3855    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3856    matrix, ans [DF] as another SeqAIJ matrix.
3857 
3858    When d_nz, o_nz parameters are specified, d_nz storage elements are
3859    allocated for every row of the local diagonal submatrix, and o_nz
3860    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3861    One way to choose d_nz and o_nz is to use the max nonzerors per local
3862    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3863    In this case, the values of d_nz,o_nz are:
3864 .vb
3865      proc0 : dnz = 2, o_nz = 2
3866      proc1 : dnz = 3, o_nz = 2
3867      proc2 : dnz = 1, o_nz = 4
3868 .ve
3869    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3870    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3871    for proc3. i.e we are using 12+15+10=37 storage locations to store
3872    34 values.
3873 
3874    When d_nnz, o_nnz parameters are specified, the storage is specified
3875    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3876    In the above case the values for d_nnz,o_nnz are:
3877 .vb
3878      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3879      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3880      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3881 .ve
3882    Here the space allocated is sum of all the above values i.e 34, and
3883    hence pre-allocation is perfect.
3884 
3885    Level: intermediate
3886 
3887 .keywords: matrix, aij, compressed row, sparse, parallel
3888 
3889 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3890           MPIAIJ, MatGetInfo()
3891 @*/
3892 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3893 {
3894   PetscErrorCode ierr;
3895 
3896   PetscFunctionBegin;
3897   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3898   PetscValidType(B,1);
3899   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3900   PetscFunctionReturn(0);
3901 }
3902 
3903 #undef __FUNCT__
3904 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3905 /*@
3906      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3907          CSR format the local rows.
3908 
3909    Collective on MPI_Comm
3910 
3911    Input Parameters:
3912 +  comm - MPI communicator
3913 .  m - number of local rows (Cannot be PETSC_DECIDE)
3914 .  n - This value should be the same as the local size used in creating the
3915        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3916        calculated if N is given) For square matrices n is almost always m.
3917 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3918 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3919 .   i - row indices
3920 .   j - column indices
3921 -   a - matrix values
3922 
3923    Output Parameter:
3924 .   mat - the matrix
3925 
3926    Level: intermediate
3927 
3928    Notes:
3929        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3930      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3931      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3932 
3933        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3934 
3935        The format which is used for the sparse matrix input, is equivalent to a
3936     row-major ordering.. i.e for the following matrix, the input data expected is
3937     as shown:
3938 
3939         1 0 0
3940         2 0 3     P0
3941        -------
3942         4 5 6     P1
3943 
3944      Process0 [P0]: rows_owned=[0,1]
3945         i =  {0,1,3}  [size = nrow+1  = 2+1]
3946         j =  {0,0,2}  [size = nz = 6]
3947         v =  {1,2,3}  [size = nz = 6]
3948 
3949      Process1 [P1]: rows_owned=[2]
3950         i =  {0,3}    [size = nrow+1  = 1+1]
3951         j =  {0,1,2}  [size = nz = 6]
3952         v =  {4,5,6}  [size = nz = 6]
3953 
3954 .keywords: matrix, aij, compressed row, sparse, parallel
3955 
3956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3957           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3958 @*/
3959 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3960 {
3961   PetscErrorCode ierr;
3962 
3963  PetscFunctionBegin;
3964   if (i[0]) {
3965     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3966   }
3967   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3968   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3969   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3970   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3971   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3972   PetscFunctionReturn(0);
3973 }
3974 
3975 #undef __FUNCT__
3976 #define __FUNCT__ "MatCreateMPIAIJ"
3977 /*@C
3978    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3979    (the default parallel PETSc format).  For good matrix assembly performance
3980    the user should preallocate the matrix storage by setting the parameters
3981    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3982    performance can be increased by more than a factor of 50.
3983 
3984    Collective on MPI_Comm
3985 
3986    Input Parameters:
3987 +  comm - MPI communicator
3988 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3989            This value should be the same as the local size used in creating the
3990            y vector for the matrix-vector product y = Ax.
3991 .  n - This value should be the same as the local size used in creating the
3992        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3993        calculated if N is given) For square matrices n is almost always m.
3994 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3995 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3996 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3997            (same value is used for all local rows)
3998 .  d_nnz - array containing the number of nonzeros in the various rows of the
3999            DIAGONAL portion of the local submatrix (possibly different for each row)
4000            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4001            The size of this array is equal to the number of local rows, i.e 'm'.
4002 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4003            submatrix (same value is used for all local rows).
4004 -  o_nnz - array containing the number of nonzeros in the various rows of the
4005            OFF-DIAGONAL portion of the local submatrix (possibly different for
4006            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4007            structure. The size of this array is equal to the number
4008            of local rows, i.e 'm'.
4009 
4010    Output Parameter:
4011 .  A - the matrix
4012 
4013    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4014    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4015    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4016 
4017    Notes:
4018    If the *_nnz parameter is given then the *_nz parameter is ignored
4019 
4020    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4021    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4022    storage requirements for this matrix.
4023 
4024    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4025    processor than it must be used on all processors that share the object for
4026    that argument.
4027 
4028    The user MUST specify either the local or global matrix dimensions
4029    (possibly both).
4030 
4031    The parallel matrix is partitioned across processors such that the
4032    first m0 rows belong to process 0, the next m1 rows belong to
4033    process 1, the next m2 rows belong to process 2 etc.. where
4034    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4035    values corresponding to [m x N] submatrix.
4036 
4037    The columns are logically partitioned with the n0 columns belonging
4038    to 0th partition, the next n1 columns belonging to the next
4039    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4040 
4041    The DIAGONAL portion of the local submatrix on any given processor
4042    is the submatrix corresponding to the rows and columns m,n
4043    corresponding to the given processor. i.e diagonal matrix on
4044    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4045    etc. The remaining portion of the local submatrix [m x (N-n)]
4046    constitute the OFF-DIAGONAL portion. The example below better
4047    illustrates this concept.
4048 
4049    For a square global matrix we define each processor's diagonal portion
4050    to be its local rows and the corresponding columns (a square submatrix);
4051    each processor's off-diagonal portion encompasses the remainder of the
4052    local matrix (a rectangular submatrix).
4053 
4054    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4055 
4056    When calling this routine with a single process communicator, a matrix of
4057    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4058    type of communicator, use the construction mechanism:
4059      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4060 
4061    By default, this format uses inodes (identical nodes) when possible.
4062    We search for consecutive rows with the same nonzero structure, thereby
4063    reusing matrix information to achieve increased efficiency.
4064 
4065    Options Database Keys:
4066 +  -mat_no_inode  - Do not use inodes
4067 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4068 -  -mat_aij_oneindex - Internally use indexing starting at 1
4069         rather than 0.  Note that when calling MatSetValues(),
4070         the user still MUST index entries starting at 0!
4071 
4072 
4073    Example usage:
4074 
4075    Consider the following 8x8 matrix with 34 non-zero values, that is
4076    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4077    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4078    as follows:
4079 
4080 .vb
4081             1  2  0  |  0  3  0  |  0  4
4082     Proc0   0  5  6  |  7  0  0  |  8  0
4083             9  0 10  | 11  0  0  | 12  0
4084     -------------------------------------
4085            13  0 14  | 15 16 17  |  0  0
4086     Proc1   0 18  0  | 19 20 21  |  0  0
4087             0  0  0  | 22 23  0  | 24  0
4088     -------------------------------------
4089     Proc2  25 26 27  |  0  0 28  | 29  0
4090            30  0  0  | 31 32 33  |  0 34
4091 .ve
4092 
4093    This can be represented as a collection of submatrices as:
4094 
4095 .vb
4096       A B C
4097       D E F
4098       G H I
4099 .ve
4100 
4101    Where the submatrices A,B,C are owned by proc0, D,E,F are
4102    owned by proc1, G,H,I are owned by proc2.
4103 
4104    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4105    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4106    The 'M','N' parameters are 8,8, and have the same values on all procs.
4107 
4108    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4109    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4110    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4111    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4112    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4113    matrix, ans [DF] as another SeqAIJ matrix.
4114 
4115    When d_nz, o_nz parameters are specified, d_nz storage elements are
4116    allocated for every row of the local diagonal submatrix, and o_nz
4117    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4118    One way to choose d_nz and o_nz is to use the max nonzerors per local
4119    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4120    In this case, the values of d_nz,o_nz are:
4121 .vb
4122      proc0 : dnz = 2, o_nz = 2
4123      proc1 : dnz = 3, o_nz = 2
4124      proc2 : dnz = 1, o_nz = 4
4125 .ve
4126    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4127    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4128    for proc3. i.e we are using 12+15+10=37 storage locations to store
4129    34 values.
4130 
4131    When d_nnz, o_nnz parameters are specified, the storage is specified
4132    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4133    In the above case the values for d_nnz,o_nnz are:
4134 .vb
4135      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4136      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4137      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4138 .ve
4139    Here the space allocated is sum of all the above values i.e 34, and
4140    hence pre-allocation is perfect.
4141 
4142    Level: intermediate
4143 
4144 .keywords: matrix, aij, compressed row, sparse, parallel
4145 
4146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4147           MPIAIJ, MatCreateMPIAIJWithArrays()
4148 @*/
4149 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4150 {
4151   PetscErrorCode ierr;
4152   PetscMPIInt    size;
4153 
4154   PetscFunctionBegin;
4155   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4156   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4157   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4158   if (size > 1) {
4159     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4160     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4161   } else {
4162     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4163     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4164   }
4165   PetscFunctionReturn(0);
4166 }
4167 
4168 #undef __FUNCT__
4169 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4170 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4171 {
4172   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4173 
4174   PetscFunctionBegin;
4175   *Ad     = a->A;
4176   *Ao     = a->B;
4177   *colmap = a->garray;
4178   PetscFunctionReturn(0);
4179 }
4180 
4181 #undef __FUNCT__
4182 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4183 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4184 {
4185   PetscErrorCode ierr;
4186   PetscInt       i;
4187   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4188 
4189   PetscFunctionBegin;
4190   if (coloring->ctype == IS_COLORING_GLOBAL) {
4191     ISColoringValue *allcolors,*colors;
4192     ISColoring      ocoloring;
4193 
4194     /* set coloring for diagonal portion */
4195     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4196 
4197     /* set coloring for off-diagonal portion */
4198     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4199     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4200     for (i=0; i<a->B->cmap->n; i++) {
4201       colors[i] = allcolors[a->garray[i]];
4202     }
4203     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4204     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4205     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4206     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4207   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4208     ISColoringValue *colors;
4209     PetscInt        *larray;
4210     ISColoring      ocoloring;
4211 
4212     /* set coloring for diagonal portion */
4213     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4214     for (i=0; i<a->A->cmap->n; i++) {
4215       larray[i] = i + A->cmap->rstart;
4216     }
4217     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4218     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4219     for (i=0; i<a->A->cmap->n; i++) {
4220       colors[i] = coloring->colors[larray[i]];
4221     }
4222     ierr = PetscFree(larray);CHKERRQ(ierr);
4223     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4224     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4225     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4226 
4227     /* set coloring for off-diagonal portion */
4228     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4229     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4230     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4231     for (i=0; i<a->B->cmap->n; i++) {
4232       colors[i] = coloring->colors[larray[i]];
4233     }
4234     ierr = PetscFree(larray);CHKERRQ(ierr);
4235     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4236     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4237     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4238   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4239 
4240   PetscFunctionReturn(0);
4241 }
4242 
4243 #if defined(PETSC_HAVE_ADIC)
4244 #undef __FUNCT__
4245 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4246 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4247 {
4248   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4249   PetscErrorCode ierr;
4250 
4251   PetscFunctionBegin;
4252   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4253   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4254   PetscFunctionReturn(0);
4255 }
4256 #endif
4257 
4258 #undef __FUNCT__
4259 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4260 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4261 {
4262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4263   PetscErrorCode ierr;
4264 
4265   PetscFunctionBegin;
4266   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4267   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4268   PetscFunctionReturn(0);
4269 }
4270 
4271 #undef __FUNCT__
4272 #define __FUNCT__ "MatMergeSymbolic"
4273 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4274 {
4275   PetscErrorCode ierr;
4276   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum;
4277   PetscInt       *indx;
4278 
4279   PetscFunctionBegin;
4280   /* This routine will ONLY return MPIAIJ type matrix */
4281   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4282   if (n == PETSC_DECIDE){
4283     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4284   }
4285   /* Check sum(n) = N */
4286   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPIU_SUM,comm);CHKERRQ(ierr);
4287   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4288 
4289   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4290   rstart -= m;
4291 
4292   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4293   for (i=0;i<m;i++) {
4294     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4295     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4296     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4297   }
4298 
4299   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4300   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4301   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4302   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4303   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4304   PetscFunctionReturn(0);
4305 }
4306 
4307 #undef __FUNCT__
4308 #define __FUNCT__ "MatMergeNumeric"
4309 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4310 {
4311   PetscErrorCode ierr;
4312   PetscInt       m,N,i,rstart,nnz,Ii;
4313   PetscInt       *indx;
4314   PetscScalar    *values;
4315 
4316   PetscFunctionBegin;
4317   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4318   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4319   for (i=0;i<m;i++) {
4320     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4321     Ii    = i + rstart;
4322     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4323     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4324   }
4325   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4326   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4327   PetscFunctionReturn(0);
4328 }
4329 
4330 #undef __FUNCT__
4331 #define __FUNCT__ "MatMerge"
4332 /*@
4333       MatMerge - Creates a single large PETSc matrix by concatenating sequential
4334                  matrices from each processor
4335 
4336     Collective on MPI_Comm
4337 
4338    Input Parameters:
4339 +    comm - the communicators the parallel matrix will live on
4340 .    inmat - the input sequential matrices
4341 .    n - number of local columns (or PETSC_DECIDE)
4342 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4343 
4344    Output Parameter:
4345 .    outmat - the parallel matrix generated
4346 
4347     Level: advanced
4348 
4349    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4350 
4351 @*/
4352 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4353 {
4354   PetscErrorCode ierr;
4355 
4356   PetscFunctionBegin;
4357   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4358   if (scall == MAT_INITIAL_MATRIX){
4359     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4360   }
4361   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4362   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4363   PetscFunctionReturn(0);
4364 }
4365 
4366 #undef __FUNCT__
4367 #define __FUNCT__ "MatFileSplit"
4368 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4369 {
4370   PetscErrorCode    ierr;
4371   PetscMPIInt       rank;
4372   PetscInt          m,N,i,rstart,nnz;
4373   size_t            len;
4374   const PetscInt    *indx;
4375   PetscViewer       out;
4376   char              *name;
4377   Mat               B;
4378   const PetscScalar *values;
4379 
4380   PetscFunctionBegin;
4381   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4382   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4383   /* Should this be the type of the diagonal block of A? */
4384   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4385   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4386   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4387   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4388   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4389   for (i=0;i<m;i++) {
4390     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4391     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4392     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4393   }
4394   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4395   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4396 
4397   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4398   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4399   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4400   sprintf(name,"%s.%d",outfile,rank);
4401   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4402   ierr = PetscFree(name);
4403   ierr = MatView(B,out);CHKERRQ(ierr);
4404   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4405   ierr = MatDestroy(&B);CHKERRQ(ierr);
4406   PetscFunctionReturn(0);
4407 }
4408 
4409 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4410 #undef __FUNCT__
4411 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4412 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4413 {
4414   PetscErrorCode       ierr;
4415   Mat_Merge_SeqsToMPI  *merge;
4416   PetscContainer       container;
4417 
4418   PetscFunctionBegin;
4419   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4420   if (container) {
4421     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4430     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4431     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4432     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4433     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4434     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4435     ierr = PetscFree(merge);CHKERRQ(ierr);
4436     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4437   }
4438   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4439   PetscFunctionReturn(0);
4440 }
4441 
4442 #include <../src/mat/utils/freespace.h>
4443 #include <petscbt.h>
4444 
4445 #undef __FUNCT__
4446 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4447 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4448 {
4449   PetscErrorCode       ierr;
4450   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4451   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4452   PetscMPIInt          size,rank,taga,*len_s;
4453   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4454   PetscInt             proc,m;
4455   PetscInt             **buf_ri,**buf_rj;
4456   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4457   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4458   MPI_Request          *s_waits,*r_waits;
4459   MPI_Status           *status;
4460   MatScalar            *aa=a->a;
4461   MatScalar            **abuf_r,*ba_i;
4462   Mat_Merge_SeqsToMPI  *merge;
4463   PetscContainer       container;
4464 
4465   PetscFunctionBegin;
4466   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4467 
4468   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4469   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4470 
4471   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4472   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4473 
4474   bi     = merge->bi;
4475   bj     = merge->bj;
4476   buf_ri = merge->buf_ri;
4477   buf_rj = merge->buf_rj;
4478 
4479   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4480   owners = merge->rowmap->range;
4481   len_s  = merge->len_s;
4482 
4483   /* send and recv matrix values */
4484   /*-----------------------------*/
4485   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4486   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4487 
4488   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4489   for (proc=0,k=0; proc<size; proc++){
4490     if (!len_s[proc]) continue;
4491     i = owners[proc];
4492     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4493     k++;
4494   }
4495 
4496   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4497   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4498   ierr = PetscFree(status);CHKERRQ(ierr);
4499 
4500   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4501   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4502 
4503   /* insert mat values of mpimat */
4504   /*----------------------------*/
4505   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4506   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4507 
4508   for (k=0; k<merge->nrecv; k++){
4509     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4510     nrows = *(buf_ri_k[k]);
4511     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4512     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4513   }
4514 
4515   /* set values of ba */
4516   m = merge->rowmap->n;
4517   for (i=0; i<m; i++) {
4518     arow = owners[rank] + i;
4519     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4520     bnzi = bi[i+1] - bi[i];
4521     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4522 
4523     /* add local non-zero vals of this proc's seqmat into ba */
4524     anzi = ai[arow+1] - ai[arow];
4525     aj   = a->j + ai[arow];
4526     aa   = a->a + ai[arow];
4527     nextaj = 0;
4528     for (j=0; nextaj<anzi; j++){
4529       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4530         ba_i[j] += aa[nextaj++];
4531       }
4532     }
4533 
4534     /* add received vals into ba */
4535     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4536       /* i-th row */
4537       if (i == *nextrow[k]) {
4538         anzi = *(nextai[k]+1) - *nextai[k];
4539         aj   = buf_rj[k] + *(nextai[k]);
4540         aa   = abuf_r[k] + *(nextai[k]);
4541         nextaj = 0;
4542         for (j=0; nextaj<anzi; j++){
4543           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4544             ba_i[j] += aa[nextaj++];
4545           }
4546         }
4547         nextrow[k]++; nextai[k]++;
4548       }
4549     }
4550     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4551   }
4552   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4553   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4554 
4555   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4556   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4557   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4558   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4559   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4560   PetscFunctionReturn(0);
4561 }
4562 
4563 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4564 
4565 #undef __FUNCT__
4566 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4567 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4568 {
4569   PetscErrorCode       ierr;
4570   Mat                  B_mpi;
4571   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4572   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4573   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4574   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4575   PetscInt             len,proc,*dnz,*onz;
4576   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4577   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4578   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4579   MPI_Status           *status;
4580   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4581   PetscBT              lnkbt;
4582   Mat_Merge_SeqsToMPI  *merge;
4583   PetscContainer       container;
4584 
4585   PetscFunctionBegin;
4586   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4587 
4588   /* make sure it is a PETSc comm */
4589   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4590   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4591   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4592 
4593   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4594   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4595 
4596   /* determine row ownership */
4597   /*---------------------------------------------------------*/
4598   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4599   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4600   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4601   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4602   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4603   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4604   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4605 
4606   m      = merge->rowmap->n;
4607   M      = merge->rowmap->N;
4608   owners = merge->rowmap->range;
4609 
4610   /* determine the number of messages to send, their lengths */
4611   /*---------------------------------------------------------*/
4612   len_s  = merge->len_s;
4613 
4614   len = 0;  /* length of buf_si[] */
4615   merge->nsend = 0;
4616   for (proc=0; proc<size; proc++){
4617     len_si[proc] = 0;
4618     if (proc == rank){
4619       len_s[proc] = 0;
4620     } else {
4621       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4622       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4623     }
4624     if (len_s[proc]) {
4625       merge->nsend++;
4626       nrows = 0;
4627       for (i=owners[proc]; i<owners[proc+1]; i++){
4628         if (ai[i+1] > ai[i]) nrows++;
4629       }
4630       len_si[proc] = 2*(nrows+1);
4631       len += len_si[proc];
4632     }
4633   }
4634 
4635   /* determine the number and length of messages to receive for ij-structure */
4636   /*-------------------------------------------------------------------------*/
4637   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4638   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4639 
4640   /* post the Irecv of j-structure */
4641   /*-------------------------------*/
4642   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4643   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4644 
4645   /* post the Isend of j-structure */
4646   /*--------------------------------*/
4647   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4648 
4649   for (proc=0, k=0; proc<size; proc++){
4650     if (!len_s[proc]) continue;
4651     i = owners[proc];
4652     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4653     k++;
4654   }
4655 
4656   /* receives and sends of j-structure are complete */
4657   /*------------------------------------------------*/
4658   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4659   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4660 
4661   /* send and recv i-structure */
4662   /*---------------------------*/
4663   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4664   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4665 
4666   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4667   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4668   for (proc=0,k=0; proc<size; proc++){
4669     if (!len_s[proc]) continue;
4670     /* form outgoing message for i-structure:
4671          buf_si[0]:                 nrows to be sent
4672                [1:nrows]:           row index (global)
4673                [nrows+1:2*nrows+1]: i-structure index
4674     */
4675     /*-------------------------------------------*/
4676     nrows = len_si[proc]/2 - 1;
4677     buf_si_i    = buf_si + nrows+1;
4678     buf_si[0]   = nrows;
4679     buf_si_i[0] = 0;
4680     nrows = 0;
4681     for (i=owners[proc]; i<owners[proc+1]; i++){
4682       anzi = ai[i+1] - ai[i];
4683       if (anzi) {
4684         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4685         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4686         nrows++;
4687       }
4688     }
4689     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4690     k++;
4691     buf_si += len_si[proc];
4692   }
4693 
4694   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4695   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4696 
4697   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4698   for (i=0; i<merge->nrecv; i++){
4699     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4700   }
4701 
4702   ierr = PetscFree(len_si);CHKERRQ(ierr);
4703   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4704   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4705   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4706   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4707   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4708   ierr = PetscFree(status);CHKERRQ(ierr);
4709 
4710   /* compute a local seq matrix in each processor */
4711   /*----------------------------------------------*/
4712   /* allocate bi array and free space for accumulating nonzero column info */
4713   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4714   bi[0] = 0;
4715 
4716   /* create and initialize a linked list */
4717   nlnk = N+1;
4718   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4719 
4720   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4721   len = 0;
4722   len  = ai[owners[rank+1]] - ai[owners[rank]];
4723   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4724   current_space = free_space;
4725 
4726   /* determine symbolic info for each local row */
4727   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4728 
4729   for (k=0; k<merge->nrecv; k++){
4730     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4731     nrows = *buf_ri_k[k];
4732     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4733     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4734   }
4735 
4736   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4737   len = 0;
4738   for (i=0;i<m;i++) {
4739     bnzi   = 0;
4740     /* add local non-zero cols of this proc's seqmat into lnk */
4741     arow   = owners[rank] + i;
4742     anzi   = ai[arow+1] - ai[arow];
4743     aj     = a->j + ai[arow];
4744     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4745     bnzi += nlnk;
4746     /* add received col data into lnk */
4747     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4748       if (i == *nextrow[k]) { /* i-th row */
4749         anzi = *(nextai[k]+1) - *nextai[k];
4750         aj   = buf_rj[k] + *nextai[k];
4751         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4752         bnzi += nlnk;
4753         nextrow[k]++; nextai[k]++;
4754       }
4755     }
4756     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4757 
4758     /* if free space is not available, make more free space */
4759     if (current_space->local_remaining<bnzi) {
4760       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4761       nspacedouble++;
4762     }
4763     /* copy data into free space, then initialize lnk */
4764     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4765     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4766 
4767     current_space->array           += bnzi;
4768     current_space->local_used      += bnzi;
4769     current_space->local_remaining -= bnzi;
4770 
4771     bi[i+1] = bi[i] + bnzi;
4772   }
4773 
4774   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4775 
4776   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4777   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4778   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4779 
4780   /* create symbolic parallel matrix B_mpi */
4781   /*---------------------------------------*/
4782   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4783   if (n==PETSC_DECIDE) {
4784     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4785   } else {
4786     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4787   }
4788   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4789   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4790   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4791 
4792   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4793   B_mpi->assembled     = PETSC_FALSE;
4794   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4795   merge->bi            = bi;
4796   merge->bj            = bj;
4797   merge->buf_ri        = buf_ri;
4798   merge->buf_rj        = buf_rj;
4799   merge->coi           = PETSC_NULL;
4800   merge->coj           = PETSC_NULL;
4801   merge->owners_co     = PETSC_NULL;
4802 
4803   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4804 
4805   /* attach the supporting struct to B_mpi for reuse */
4806   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4807   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4808   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4809   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4810   *mpimat = B_mpi;
4811 
4812   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4813   PetscFunctionReturn(0);
4814 }
4815 
4816 #undef __FUNCT__
4817 #define __FUNCT__ "MatMerge_SeqsToMPI"
4818 /*@C
4819       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4820                  matrices from each processor
4821 
4822     Collective on MPI_Comm
4823 
4824    Input Parameters:
4825 +    comm - the communicators the parallel matrix will live on
4826 .    seqmat - the input sequential matrices
4827 .    m - number of local rows (or PETSC_DECIDE)
4828 .    n - number of local columns (or PETSC_DECIDE)
4829 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4830 
4831    Output Parameter:
4832 .    mpimat - the parallel matrix generated
4833 
4834     Level: advanced
4835 
4836    Notes:
4837      The dimensions of the sequential matrix in each processor MUST be the same.
4838      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4839      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4840 @*/
4841 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4842 {
4843   PetscErrorCode   ierr;
4844 
4845   PetscFunctionBegin;
4846   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4847   if (scall == MAT_INITIAL_MATRIX){
4848     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4849   }
4850   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4851   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4852   PetscFunctionReturn(0);
4853 }
4854 
4855 #undef __FUNCT__
4856 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4857 /*@
4858      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4859           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4860           with MatGetSize()
4861 
4862     Not Collective
4863 
4864    Input Parameters:
4865 +    A - the matrix
4866 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4867 
4868    Output Parameter:
4869 .    A_loc - the local sequential matrix generated
4870 
4871     Level: developer
4872 
4873 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4874 
4875 @*/
4876 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4877 {
4878   PetscErrorCode  ierr;
4879   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4880   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4881   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4882   MatScalar       *aa=a->a,*ba=b->a,*cam;
4883   PetscScalar     *ca;
4884   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4885   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4886   PetscBool       match;
4887 
4888   PetscFunctionBegin;
4889   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4890   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4891   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4892   if (scall == MAT_INITIAL_MATRIX){
4893     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4894     ci[0] = 0;
4895     for (i=0; i<am; i++){
4896       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4897     }
4898     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4899     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4900     k = 0;
4901     for (i=0; i<am; i++) {
4902       ncols_o = bi[i+1] - bi[i];
4903       ncols_d = ai[i+1] - ai[i];
4904       /* off-diagonal portion of A */
4905       for (jo=0; jo<ncols_o; jo++) {
4906         col = cmap[*bj];
4907         if (col >= cstart) break;
4908         cj[k]   = col; bj++;
4909         ca[k++] = *ba++;
4910       }
4911       /* diagonal portion of A */
4912       for (j=0; j<ncols_d; j++) {
4913         cj[k]   = cstart + *aj++;
4914         ca[k++] = *aa++;
4915       }
4916       /* off-diagonal portion of A */
4917       for (j=jo; j<ncols_o; j++) {
4918         cj[k]   = cmap[*bj++];
4919         ca[k++] = *ba++;
4920       }
4921     }
4922     /* put together the new matrix */
4923     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4924     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4925     /* Since these are PETSc arrays, change flags to free them as necessary. */
4926     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4927     mat->free_a  = PETSC_TRUE;
4928     mat->free_ij = PETSC_TRUE;
4929     mat->nonew   = 0;
4930   } else if (scall == MAT_REUSE_MATRIX){
4931     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4932     ci = mat->i; cj = mat->j; cam = mat->a;
4933     for (i=0; i<am; i++) {
4934       /* off-diagonal portion of A */
4935       ncols_o = bi[i+1] - bi[i];
4936       for (jo=0; jo<ncols_o; jo++) {
4937         col = cmap[*bj];
4938         if (col >= cstart) break;
4939         *cam++ = *ba++; bj++;
4940       }
4941       /* diagonal portion of A */
4942       ncols_d = ai[i+1] - ai[i];
4943       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4944       /* off-diagonal portion of A */
4945       for (j=jo; j<ncols_o; j++) {
4946         *cam++ = *ba++; bj++;
4947       }
4948     }
4949   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4950   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 #undef __FUNCT__
4955 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4956 /*@C
4957      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4958 
4959     Not Collective
4960 
4961    Input Parameters:
4962 +    A - the matrix
4963 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4964 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4965 
4966    Output Parameter:
4967 .    A_loc - the local sequential matrix generated
4968 
4969     Level: developer
4970 
4971 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4972 
4973 @*/
4974 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4975 {
4976   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4977   PetscErrorCode    ierr;
4978   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4979   IS                isrowa,iscola;
4980   Mat               *aloc;
4981   PetscBool       match;
4982 
4983   PetscFunctionBegin;
4984   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4985   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4986   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4987   if (!row){
4988     start = A->rmap->rstart; end = A->rmap->rend;
4989     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4990   } else {
4991     isrowa = *row;
4992   }
4993   if (!col){
4994     start = A->cmap->rstart;
4995     cmap  = a->garray;
4996     nzA   = a->A->cmap->n;
4997     nzB   = a->B->cmap->n;
4998     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4999     ncols = 0;
5000     for (i=0; i<nzB; i++) {
5001       if (cmap[i] < start) idx[ncols++] = cmap[i];
5002       else break;
5003     }
5004     imark = i;
5005     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5006     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5007     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5008   } else {
5009     iscola = *col;
5010   }
5011   if (scall != MAT_INITIAL_MATRIX){
5012     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5013     aloc[0] = *A_loc;
5014   }
5015   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5016   *A_loc = aloc[0];
5017   ierr = PetscFree(aloc);CHKERRQ(ierr);
5018   if (!row){
5019     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5020   }
5021   if (!col){
5022     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5023   }
5024   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5025   PetscFunctionReturn(0);
5026 }
5027 
5028 #undef __FUNCT__
5029 #define __FUNCT__ "MatGetBrowsOfAcols"
5030 /*@C
5031     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5032 
5033     Collective on Mat
5034 
5035    Input Parameters:
5036 +    A,B - the matrices in mpiaij format
5037 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5038 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5039 
5040    Output Parameter:
5041 +    rowb, colb - index sets of rows and columns of B to extract
5042 -    B_seq - the sequential matrix generated
5043 
5044     Level: developer
5045 
5046 @*/
5047 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5048 {
5049   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5050   PetscErrorCode    ierr;
5051   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5052   IS                isrowb,iscolb;
5053   Mat               *bseq=PETSC_NULL;
5054 
5055   PetscFunctionBegin;
5056   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5057     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5058   }
5059   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5060 
5061   if (scall == MAT_INITIAL_MATRIX){
5062     start = A->cmap->rstart;
5063     cmap  = a->garray;
5064     nzA   = a->A->cmap->n;
5065     nzB   = a->B->cmap->n;
5066     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5067     ncols = 0;
5068     for (i=0; i<nzB; i++) {  /* row < local row index */
5069       if (cmap[i] < start) idx[ncols++] = cmap[i];
5070       else break;
5071     }
5072     imark = i;
5073     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5074     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5075     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5076     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5077   } else {
5078     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5079     isrowb = *rowb; iscolb = *colb;
5080     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5081     bseq[0] = *B_seq;
5082   }
5083   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5084   *B_seq = bseq[0];
5085   ierr = PetscFree(bseq);CHKERRQ(ierr);
5086   if (!rowb){
5087     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5088   } else {
5089     *rowb = isrowb;
5090   }
5091   if (!colb){
5092     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5093   } else {
5094     *colb = iscolb;
5095   }
5096   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5097   PetscFunctionReturn(0);
5098 }
5099 
5100 #undef __FUNCT__
5101 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5102 /*
5103     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5104     of the OFF-DIAGONAL portion of local A
5105 
5106     Collective on Mat
5107 
5108    Input Parameters:
5109 +    A,B - the matrices in mpiaij format
5110 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5111 
5112    Output Parameter:
5113 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5114 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5115 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5116 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5117 
5118     Level: developer
5119 
5120 */
5121 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5122 {
5123   VecScatter_MPI_General *gen_to,*gen_from;
5124   PetscErrorCode         ierr;
5125   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5126   Mat_SeqAIJ             *b_oth;
5127   VecScatter             ctx=a->Mvctx;
5128   MPI_Comm               comm=((PetscObject)ctx)->comm;
5129   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5130   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5131   PetscScalar            *rvalues,*svalues;
5132   MatScalar              *b_otha,*bufa,*bufA;
5133   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5134   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5135   MPI_Status             *sstatus,rstatus;
5136   PetscMPIInt            jj;
5137   PetscInt               *cols,sbs,rbs;
5138   PetscScalar            *vals;
5139 
5140   PetscFunctionBegin;
5141   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5142     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5143   }
5144   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5145   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5146 
5147   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5148   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5149   rvalues  = gen_from->values; /* holds the length of receiving row */
5150   svalues  = gen_to->values;   /* holds the length of sending row */
5151   nrecvs   = gen_from->n;
5152   nsends   = gen_to->n;
5153 
5154   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5155   srow     = gen_to->indices;   /* local row index to be sent */
5156   sstarts  = gen_to->starts;
5157   sprocs   = gen_to->procs;
5158   sstatus  = gen_to->sstatus;
5159   sbs      = gen_to->bs;
5160   rstarts  = gen_from->starts;
5161   rprocs   = gen_from->procs;
5162   rbs      = gen_from->bs;
5163 
5164   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5165   if (scall == MAT_INITIAL_MATRIX){
5166     /* i-array */
5167     /*---------*/
5168     /*  post receives */
5169     for (i=0; i<nrecvs; i++){
5170       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5171       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5172       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5173     }
5174 
5175     /* pack the outgoing message */
5176     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5177     sstartsj[0] = 0;  rstartsj[0] = 0;
5178     len = 0; /* total length of j or a array to be sent */
5179     k = 0;
5180     for (i=0; i<nsends; i++){
5181       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5182       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5183       for (j=0; j<nrows; j++) {
5184         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5185         for (l=0; l<sbs; l++){
5186           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5187           rowlen[j*sbs+l] = ncols;
5188           len += ncols;
5189           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5190         }
5191         k++;
5192       }
5193       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5194       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5195     }
5196     /* recvs and sends of i-array are completed */
5197     i = nrecvs;
5198     while (i--) {
5199       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5200     }
5201     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5202 
5203     /* allocate buffers for sending j and a arrays */
5204     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5205     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5206 
5207     /* create i-array of B_oth */
5208     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5209     b_othi[0] = 0;
5210     len = 0; /* total length of j or a array to be received */
5211     k = 0;
5212     for (i=0; i<nrecvs; i++){
5213       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5214       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5215       for (j=0; j<nrows; j++) {
5216         b_othi[k+1] = b_othi[k] + rowlen[j];
5217         len += rowlen[j]; k++;
5218       }
5219       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5220     }
5221 
5222     /* allocate space for j and a arrrays of B_oth */
5223     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5224     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5225 
5226     /* j-array */
5227     /*---------*/
5228     /*  post receives of j-array */
5229     for (i=0; i<nrecvs; i++){
5230       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5231       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5232     }
5233 
5234     /* pack the outgoing message j-array */
5235     k = 0;
5236     for (i=0; i<nsends; i++){
5237       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5238       bufJ = bufj+sstartsj[i];
5239       for (j=0; j<nrows; j++) {
5240         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5241         for (ll=0; ll<sbs; ll++){
5242           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5243           for (l=0; l<ncols; l++){
5244             *bufJ++ = cols[l];
5245           }
5246           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5247         }
5248       }
5249       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5250     }
5251 
5252     /* recvs and sends of j-array are completed */
5253     i = nrecvs;
5254     while (i--) {
5255       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5256     }
5257     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5258   } else if (scall == MAT_REUSE_MATRIX){
5259     sstartsj = *startsj_s;
5260     rstartsj = *startsj_r;
5261     bufa     = *bufa_ptr;
5262     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5263     b_otha   = b_oth->a;
5264   } else {
5265     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5266   }
5267 
5268   /* a-array */
5269   /*---------*/
5270   /*  post receives of a-array */
5271   for (i=0; i<nrecvs; i++){
5272     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5273     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5274   }
5275 
5276   /* pack the outgoing message a-array */
5277   k = 0;
5278   for (i=0; i<nsends; i++){
5279     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5280     bufA = bufa+sstartsj[i];
5281     for (j=0; j<nrows; j++) {
5282       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5283       for (ll=0; ll<sbs; ll++){
5284         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5285         for (l=0; l<ncols; l++){
5286           *bufA++ = vals[l];
5287         }
5288         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5289       }
5290     }
5291     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5292   }
5293   /* recvs and sends of a-array are completed */
5294   i = nrecvs;
5295   while (i--) {
5296     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5297   }
5298   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5299   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5300 
5301   if (scall == MAT_INITIAL_MATRIX){
5302     /* put together the new matrix */
5303     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5304 
5305     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5306     /* Since these are PETSc arrays, change flags to free them as necessary. */
5307     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5308     b_oth->free_a  = PETSC_TRUE;
5309     b_oth->free_ij = PETSC_TRUE;
5310     b_oth->nonew   = 0;
5311 
5312     ierr = PetscFree(bufj);CHKERRQ(ierr);
5313     if (!startsj_s || !bufa_ptr){
5314       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5315       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5316     } else {
5317       *startsj_s = sstartsj;
5318       *startsj_r = rstartsj;
5319       *bufa_ptr  = bufa;
5320     }
5321   }
5322   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5323   PetscFunctionReturn(0);
5324 }
5325 
5326 #undef __FUNCT__
5327 #define __FUNCT__ "MatGetCommunicationStructs"
5328 /*@C
5329   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5330 
5331   Not Collective
5332 
5333   Input Parameters:
5334 . A - The matrix in mpiaij format
5335 
5336   Output Parameter:
5337 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5338 . colmap - A map from global column index to local index into lvec
5339 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5340 
5341   Level: developer
5342 
5343 @*/
5344 #if defined (PETSC_USE_CTABLE)
5345 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5346 #else
5347 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5348 #endif
5349 {
5350   Mat_MPIAIJ *a;
5351 
5352   PetscFunctionBegin;
5353   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5354   PetscValidPointer(lvec, 2);
5355   PetscValidPointer(colmap, 3);
5356   PetscValidPointer(multScatter, 4);
5357   a = (Mat_MPIAIJ *) A->data;
5358   if (lvec) *lvec = a->lvec;
5359   if (colmap) *colmap = a->colmap;
5360   if (multScatter) *multScatter = a->Mvctx;
5361   PetscFunctionReturn(0);
5362 }
5363 
5364 EXTERN_C_BEGIN
5365 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5366 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5367 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5368 EXTERN_C_END
5369 
5370 #undef __FUNCT__
5371 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5372 /*
5373     Computes (B'*A')' since computing B*A directly is untenable
5374 
5375                n                       p                          p
5376         (              )       (              )         (                  )
5377       m (      A       )  *  n (       B      )   =   m (         C        )
5378         (              )       (              )         (                  )
5379 
5380 */
5381 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5382 {
5383   PetscErrorCode     ierr;
5384   Mat                At,Bt,Ct;
5385 
5386   PetscFunctionBegin;
5387   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5388   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5389   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5390   ierr = MatDestroy(&At);CHKERRQ(ierr);
5391   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5392   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5393   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5394   PetscFunctionReturn(0);
5395 }
5396 
5397 #undef __FUNCT__
5398 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5399 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5400 {
5401   PetscErrorCode ierr;
5402   PetscInt       m=A->rmap->n,n=B->cmap->n;
5403   Mat            Cmat;
5404 
5405   PetscFunctionBegin;
5406   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5407   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5408   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5409   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5410   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5411   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5412   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5413   *C   = Cmat;
5414   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5415   PetscFunctionReturn(0);
5416 }
5417 
5418 /* ----------------------------------------------------------------*/
5419 #undef __FUNCT__
5420 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5421 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5422 {
5423   PetscErrorCode ierr;
5424 
5425   PetscFunctionBegin;
5426   if (scall == MAT_INITIAL_MATRIX){
5427     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5428   }
5429   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5430   PetscFunctionReturn(0);
5431 }
5432 
5433 EXTERN_C_BEGIN
5434 #if defined(PETSC_HAVE_MUMPS)
5435 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5436 #endif
5437 #if defined(PETSC_HAVE_PASTIX)
5438 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5439 #endif
5440 #if defined(PETSC_HAVE_SUPERLU_DIST)
5441 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5442 #endif
5443 #if defined(PETSC_HAVE_SPOOLES)
5444 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5445 #endif
5446 EXTERN_C_END
5447 
5448 /*MC
5449    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5450 
5451    Options Database Keys:
5452 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5453 
5454   Level: beginner
5455 
5456 .seealso: MatCreateMPIAIJ()
5457 M*/
5458 
5459 EXTERN_C_BEGIN
5460 #undef __FUNCT__
5461 #define __FUNCT__ "MatCreate_MPIAIJ"
5462 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5463 {
5464   Mat_MPIAIJ     *b;
5465   PetscErrorCode ierr;
5466   PetscMPIInt    size;
5467 
5468   PetscFunctionBegin;
5469   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5470 
5471   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5472   B->data         = (void*)b;
5473   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5474   B->rmap->bs     = 1;
5475   B->assembled    = PETSC_FALSE;
5476 
5477   B->insertmode   = NOT_SET_VALUES;
5478   b->size         = size;
5479   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5480 
5481   /* build cache for off array entries formed */
5482   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5483   b->donotstash  = PETSC_FALSE;
5484   b->colmap      = 0;
5485   b->garray      = 0;
5486   b->roworiented = PETSC_TRUE;
5487 
5488   /* stuff used for matrix vector multiply */
5489   b->lvec      = PETSC_NULL;
5490   b->Mvctx     = PETSC_NULL;
5491 
5492   /* stuff for MatGetRow() */
5493   b->rowindices   = 0;
5494   b->rowvalues    = 0;
5495   b->getrowactive = PETSC_FALSE;
5496 
5497 #if defined(PETSC_HAVE_SPOOLES)
5498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5499                                      "MatGetFactor_mpiaij_spooles",
5500                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5501 #endif
5502 #if defined(PETSC_HAVE_MUMPS)
5503   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5504                                      "MatGetFactor_aij_mumps",
5505                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5506 #endif
5507 #if defined(PETSC_HAVE_PASTIX)
5508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5509 					   "MatGetFactor_mpiaij_pastix",
5510 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5511 #endif
5512 #if defined(PETSC_HAVE_SUPERLU_DIST)
5513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5514                                      "MatGetFactor_mpiaij_superlu_dist",
5515                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5516 #endif
5517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5518                                      "MatStoreValues_MPIAIJ",
5519                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5521                                      "MatRetrieveValues_MPIAIJ",
5522                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5523   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5524 				     "MatGetDiagonalBlock_MPIAIJ",
5525                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5527 				     "MatIsTranspose_MPIAIJ",
5528 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5529   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5530 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5531 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5533 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5534 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5535   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5536 				     "MatDiagonalScaleLocal_MPIAIJ",
5537 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5538   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5539                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5540                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5541   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5542                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5543                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5544   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5545                                      "MatConvert_MPIAIJ_MPISBAIJ",
5546                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5547   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5548                                      "MatMatMult_MPIDense_MPIAIJ",
5549                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5550   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5551                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5552                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5554                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5555                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5556   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5557   PetscFunctionReturn(0);
5558 }
5559 EXTERN_C_END
5560 
5561 #undef __FUNCT__
5562 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5563 /*@
5564      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5565          and "off-diagonal" part of the matrix in CSR format.
5566 
5567    Collective on MPI_Comm
5568 
5569    Input Parameters:
5570 +  comm - MPI communicator
5571 .  m - number of local rows (Cannot be PETSC_DECIDE)
5572 .  n - This value should be the same as the local size used in creating the
5573        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5574        calculated if N is given) For square matrices n is almost always m.
5575 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5576 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5577 .   i - row indices for "diagonal" portion of matrix
5578 .   j - column indices
5579 .   a - matrix values
5580 .   oi - row indices for "off-diagonal" portion of matrix
5581 .   oj - column indices
5582 -   oa - matrix values
5583 
5584    Output Parameter:
5585 .   mat - the matrix
5586 
5587    Level: advanced
5588 
5589    Notes:
5590        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5591        must free the arrays once the matrix has been destroyed and not before.
5592 
5593        The i and j indices are 0 based
5594 
5595        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5596 
5597        This sets local rows and cannot be used to set off-processor values.
5598 
5599        You cannot later use MatSetValues() to change values in this matrix.
5600 
5601 .keywords: matrix, aij, compressed row, sparse, parallel
5602 
5603 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5604           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5605 @*/
5606 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5607 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5608 {
5609   PetscErrorCode ierr;
5610   Mat_MPIAIJ     *maij;
5611 
5612  PetscFunctionBegin;
5613   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5614   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5615   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5616   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5617   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5618   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5619   maij = (Mat_MPIAIJ*) (*mat)->data;
5620   maij->donotstash     = PETSC_TRUE;
5621   (*mat)->preallocated = PETSC_TRUE;
5622 
5623   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5624   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5625   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5626   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5627 
5628   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5629   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5630 
5631   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5632   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5633   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5634   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5635 
5636   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5637   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5638   PetscFunctionReturn(0);
5639 }
5640 
5641 /*
5642     Special version for direct calls from Fortran
5643 */
5644 #include <private/fortranimpl.h>
5645 
5646 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5647 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5649 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5650 #endif
5651 
5652 /* Change these macros so can be used in void function */
5653 #undef CHKERRQ
5654 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5655 #undef SETERRQ2
5656 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5657 #undef SETERRQ3
5658 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5659 #undef SETERRQ
5660 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5661 
5662 EXTERN_C_BEGIN
5663 #undef __FUNCT__
5664 #define __FUNCT__ "matsetvaluesmpiaij_"
5665 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5666 {
5667   Mat             mat = *mmat;
5668   PetscInt        m = *mm, n = *mn;
5669   InsertMode      addv = *maddv;
5670   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5671   PetscScalar     value;
5672   PetscErrorCode  ierr;
5673 
5674   MatCheckPreallocated(mat,1);
5675   if (mat->insertmode == NOT_SET_VALUES) {
5676     mat->insertmode = addv;
5677   }
5678 #if defined(PETSC_USE_DEBUG)
5679   else if (mat->insertmode != addv) {
5680     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5681   }
5682 #endif
5683   {
5684   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5685   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5686   PetscBool       roworiented = aij->roworiented;
5687 
5688   /* Some Variables required in the macro */
5689   Mat             A = aij->A;
5690   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5691   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5692   MatScalar       *aa = a->a;
5693   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5694   Mat             B = aij->B;
5695   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5696   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5697   MatScalar       *ba = b->a;
5698 
5699   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5700   PetscInt        nonew = a->nonew;
5701   MatScalar       *ap1,*ap2;
5702 
5703   PetscFunctionBegin;
5704   for (i=0; i<m; i++) {
5705     if (im[i] < 0) continue;
5706 #if defined(PETSC_USE_DEBUG)
5707     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5708 #endif
5709     if (im[i] >= rstart && im[i] < rend) {
5710       row      = im[i] - rstart;
5711       lastcol1 = -1;
5712       rp1      = aj + ai[row];
5713       ap1      = aa + ai[row];
5714       rmax1    = aimax[row];
5715       nrow1    = ailen[row];
5716       low1     = 0;
5717       high1    = nrow1;
5718       lastcol2 = -1;
5719       rp2      = bj + bi[row];
5720       ap2      = ba + bi[row];
5721       rmax2    = bimax[row];
5722       nrow2    = bilen[row];
5723       low2     = 0;
5724       high2    = nrow2;
5725 
5726       for (j=0; j<n; j++) {
5727         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5728         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5729         if (in[j] >= cstart && in[j] < cend){
5730           col = in[j] - cstart;
5731           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5732         } else if (in[j] < 0) continue;
5733 #if defined(PETSC_USE_DEBUG)
5734         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5735 #endif
5736         else {
5737           if (mat->was_assembled) {
5738             if (!aij->colmap) {
5739               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5740             }
5741 #if defined (PETSC_USE_CTABLE)
5742             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5743 	    col--;
5744 #else
5745             col = aij->colmap[in[j]] - 1;
5746 #endif
5747             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5748               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5749               col =  in[j];
5750               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5751               B = aij->B;
5752               b = (Mat_SeqAIJ*)B->data;
5753               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5754               rp2      = bj + bi[row];
5755               ap2      = ba + bi[row];
5756               rmax2    = bimax[row];
5757               nrow2    = bilen[row];
5758               low2     = 0;
5759               high2    = nrow2;
5760               bm       = aij->B->rmap->n;
5761               ba = b->a;
5762             }
5763           } else col = in[j];
5764           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5765         }
5766       }
5767     } else {
5768       if (!aij->donotstash) {
5769         if (roworiented) {
5770           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5771         } else {
5772           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5773         }
5774       }
5775     }
5776   }}
5777   PetscFunctionReturnVoid();
5778 }
5779 EXTERN_C_END
5780 
5781