xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3287b5ea4e22fe4244e8834c902767a4f15b33b1)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   if (x) {
1004     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008   }
1009   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1010 
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<slen; i++) {
1014     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1015   }
1016 
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use){
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++){
1023       n   = ii[i+1] - ii[i];
1024       aj  = aij->j + ii[i];
1025       aa  = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa        = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n   = ii[i+1] - ii[i];
1041       aj  = aij->j + ii[i];
1042       aa  = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa    = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* wait on sends */
1062   if (nsends) {
1063     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1064     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1065     ierr = PetscFree(send_status);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1068   ierr = PetscFree(svalues);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMult_MPIAIJ"
1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       nt;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1083   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscBool      merged;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1128   /* do nondiagonal part */
1129   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1130   if (!merged) {
1131     /* send it on its way */
1132     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133     /* do local part */
1134     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1135     /* receive remote parts: note this assumes the values are not actually */
1136     /* added in yy until the next line, */
1137     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   } else {
1139     /* do local part */
1140     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141     /* send it on its way */
1142     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143     /* values actually were received in the Begin() but we need to call this nop */
1144     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 EXTERN_C_BEGIN
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1152 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1153 {
1154   MPI_Comm       comm;
1155   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157   IS             Me,Notme;
1158   PetscErrorCode ierr;
1159   PetscInt       M,N,first,last,*notme,i;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163 
1164   /* Easy test: symmetric diagonal block */
1165   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1167   if (!*f) PetscFunctionReturn(0);
1168   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170   if (size == 1) PetscFunctionReturn(0);
1171 
1172   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1175   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1176   for (i=0; i<first; i++) notme[i] = i;
1177   for (i=last; i<M; i++) notme[i-last+first] = i;
1178   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1179   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1180   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1181   Aoff = Aoffs[0];
1182   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1183   Boff = Boffs[0];
1184   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1185   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1187   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1189   ierr = PetscFree(notme);CHKERRQ(ierr);
1190    PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220 {
1221   PetscErrorCode ierr;
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIAIJ"
1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatDestroy_MPIAIJ"
1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247 {
1248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252 #if defined(PETSC_USE_LOG)
1253   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254 #endif
1255   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1256   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1257   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1259 #if defined (PETSC_USE_CTABLE)
1260   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1261 #else
1262   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1266   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1267   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1268   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1269   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286 {
1287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1289   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1292   int               fd;
1293   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1294   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295   PetscScalar       *column_values;
1296   PetscInt          message_count,flowcontrolcount;
1297 
1298   PetscFunctionBegin;
1299   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1300   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1301   nz   = A->nz + B->nz;
1302   if (!rank) {
1303     header[0] = MAT_FILE_CLASSID;
1304     header[1] = mat->rmap->N;
1305     header[2] = mat->cmap->N;
1306     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1307     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1309     /* get largest number of rows any processor has */
1310     rlen = mat->rmap->n;
1311     range = mat->rmap->range;
1312     for (i=1; i<size; i++) {
1313       rlen = PetscMax(rlen,range[i+1] - range[i]);
1314     }
1315   } else {
1316     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1317     rlen = mat->rmap->n;
1318   }
1319 
1320   /* load up the local row counts */
1321   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1322   for (i=0; i<mat->rmap->n; i++) {
1323     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324   }
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1339     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* )th processor needs space a largest processor needs */
1346   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( (col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_indices[cnt++] = A->j[k] + cstart;
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_indices[cnt++] = garray[B->j[j]];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column indices to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1383 
1384   /* load up the local column values */
1385   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1386   cnt  = 0;
1387   for (i=0; i<mat->rmap->n; i++) {
1388     for (j=B->i[i]; j<B->i[i+1]; j++) {
1389       if ( garray[B->j[j]] > cstart) break;
1390       column_values[cnt++] = B->a[j];
1391     }
1392     for (k=A->i[i]; k<A->i[i+1]; k++) {
1393       column_values[cnt++] = A->a[k];
1394     }
1395     for (; j<B->i[i+1]; j++) {
1396       column_values[cnt++] = B->a[j];
1397     }
1398   }
1399   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1400 
1401   /* store the column values to the file */
1402    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1403   if (!rank) {
1404     MPI_Status status;
1405     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     for (i=1; i<size; i++) {
1407        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1408       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1409       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     }
1413     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1414   } else {
1415     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1416     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1419   }
1420   ierr = PetscFree(column_values);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427 {
1428   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1429   PetscErrorCode    ierr;
1430   PetscMPIInt       rank = aij->rank,size = aij->size;
1431   PetscBool         isdraw,iascii,isbinary;
1432   PetscViewer       sviewer;
1433   PetscViewerFormat format;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   if (iascii) {
1440     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1441     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442       MatInfo    info;
1443       PetscBool  inodes;
1444 
1445       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1446       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1449       if (!inodes) {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       }
1456       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1458       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1460       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1463       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466       PetscInt   inodecount,inodelimit,*inodes;
1467       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1468       if (inodes) {
1469         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1472       }
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475       PetscFunctionReturn(0);
1476     }
1477   } else if (isbinary) {
1478     if (size == 1) {
1479       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1480       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1481     } else {
1482       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1483     }
1484     PetscFunctionReturn(0);
1485   } else if (isdraw) {
1486     PetscDraw  draw;
1487     PetscBool  isnull;
1488     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1489     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1490   }
1491 
1492   if (size == 1) {
1493     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1495   } else {
1496     /* assemble the entire matrix onto first processor. */
1497     Mat         A;
1498     Mat_SeqAIJ  *Aloc;
1499     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500     MatScalar   *a;
1501 
1502     if (mat->rmap->N > 1024) {
1503       PetscBool  flg = PETSC_FALSE;
1504 
1505       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1506       if (!flg) {
1507         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508       }
1509     }
1510 
1511     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1512     if (!rank) {
1513       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1514     } else {
1515       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1516     }
1517     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1519     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1521     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1522 
1523     /* copy over the A part */
1524     Aloc = (Mat_SeqAIJ*)aij->A->data;
1525     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1526     row = mat->rmap->rstart;
1527     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1528     for (i=0; i<m; i++) {
1529       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1530       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1531     }
1532     aj = Aloc->j;
1533     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1534 
1535     /* copy over the B part */
1536     Aloc = (Mat_SeqAIJ*)aij->B->data;
1537     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row  = mat->rmap->rstart;
1539     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1540     ct   = cols;
1541     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1542     for (i=0; i<m; i++) {
1543       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1544       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1545     }
1546     ierr = PetscFree(ct);CHKERRQ(ierr);
1547     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1549     /*
1550        Everyone has to call to draw the matrix since the graphics waits are
1551        synchronized across all processors that share the PetscDraw object
1552     */
1553     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1554     if (!rank) {
1555       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1556       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1557       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1558       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1559     }
1560     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1561     ierr = MatDestroy(&A);CHKERRQ(ierr);
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatView_MPIAIJ"
1568 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1569 {
1570   PetscErrorCode ierr;
1571   PetscBool      iascii,isdraw,issocket,isbinary;
1572 
1573   PetscFunctionBegin;
1574   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1575   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1576   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1577   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1578   if (iascii || isdraw || isbinary || issocket) {
1579     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1580   } else {
1581     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatSOR_MPIAIJ"
1588 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1589 {
1590   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1591   PetscErrorCode ierr;
1592   Vec            bb1 = 0;
1593   PetscBool      hasop;
1594 
1595   PetscFunctionBegin;
1596   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1597     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1598   }
1599 
1600   if (flag == SOR_APPLY_UPPER) {
1601     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1602     PetscFunctionReturn(0);
1603   }
1604 
1605   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1606     if (flag & SOR_ZERO_INITIAL_GUESS) {
1607       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1608       its--;
1609     }
1610 
1611     while (its--) {
1612       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614 
1615       /* update rhs: bb1 = bb - B*x */
1616       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1617       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1618 
1619       /* local sweep */
1620       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1621     }
1622   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1623     if (flag & SOR_ZERO_INITIAL_GUESS) {
1624       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1625       its--;
1626     }
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   }  else if (flag & SOR_EISENSTAT) {
1655     Vec         xx1;
1656 
1657     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1658     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1659 
1660     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662     if (!mat->diag) {
1663       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1664       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1665     }
1666     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1667     if (hasop) {
1668       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1669     } else {
1670       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1671     }
1672     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1673 
1674     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1675 
1676     /* local sweep */
1677     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1678     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1679     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1680   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1681 
1682   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatPermute_MPIAIJ"
1688 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1689 {
1690   MPI_Comm       comm;
1691   PetscInt       first,local_rowsize,local_colsize;
1692   const PetscInt *rows;
1693   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1698   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1699   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1700   /* collect the global row permutation and invert it */
1701   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1702   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1703   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1704   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1705   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1706   /* get the local target indices */
1707   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1708   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1709   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1710   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1711   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1712   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1713   /* the column permutation is so much easier;
1714      make a local version of 'colp' and invert it */
1715   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1716   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1717   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1718   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1719   /* now we just get the submatrix */
1720   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1721   /* clean up */
1722   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1723   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1729 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1730 {
1731   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1732   Mat            A = mat->A,B = mat->B;
1733   PetscErrorCode ierr;
1734   PetscReal      isend[5],irecv[5];
1735 
1736   PetscFunctionBegin;
1737   info->block_size     = 1.0;
1738   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1739   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1740   isend[3] = info->memory;  isend[4] = info->mallocs;
1741   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1742   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1743   isend[3] += info->memory;  isend[4] += info->mallocs;
1744   if (flag == MAT_LOCAL) {
1745     info->nz_used      = isend[0];
1746     info->nz_allocated = isend[1];
1747     info->nz_unneeded  = isend[2];
1748     info->memory       = isend[3];
1749     info->mallocs      = isend[4];
1750   } else if (flag == MAT_GLOBAL_MAX) {
1751     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1752     info->nz_used      = irecv[0];
1753     info->nz_allocated = irecv[1];
1754     info->nz_unneeded  = irecv[2];
1755     info->memory       = irecv[3];
1756     info->mallocs      = irecv[4];
1757   } else if (flag == MAT_GLOBAL_SUM) {
1758     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1759     info->nz_used      = irecv[0];
1760     info->nz_allocated = irecv[1];
1761     info->nz_unneeded  = irecv[2];
1762     info->memory       = irecv[3];
1763     info->mallocs      = irecv[4];
1764   }
1765   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1766   info->fill_ratio_needed = 0;
1767   info->factor_mallocs    = 0;
1768 
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatSetOption_MPIAIJ"
1774 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1775 {
1776   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   switch (op) {
1781   case MAT_NEW_NONZERO_LOCATIONS:
1782   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1783   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1784   case MAT_KEEP_NONZERO_PATTERN:
1785   case MAT_NEW_NONZERO_LOCATION_ERR:
1786   case MAT_USE_INODES:
1787   case MAT_IGNORE_ZERO_ENTRIES:
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1790     break;
1791   case MAT_ROW_ORIENTED:
1792     a->roworiented = flg;
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1795     break;
1796   case MAT_NEW_DIAGONALS:
1797     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1798     break;
1799   case MAT_IGNORE_OFF_PROC_ENTRIES:
1800     a->donotstash = flg;
1801     break;
1802   case MAT_SPD:
1803     A->spd_set                         = PETSC_TRUE;
1804     A->spd                             = flg;
1805     if (flg) {
1806       A->symmetric                     = PETSC_TRUE;
1807       A->structurally_symmetric        = PETSC_TRUE;
1808       A->symmetric_set                 = PETSC_TRUE;
1809       A->structurally_symmetric_set    = PETSC_TRUE;
1810     }
1811     break;
1812   case MAT_SYMMETRIC:
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     break;
1815   case MAT_STRUCTURALLY_SYMMETRIC:
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     break;
1818   case MAT_HERMITIAN:
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_SYMMETRY_ETERNAL:
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     break;
1824   default:
1825     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatGetRow_MPIAIJ"
1832 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1833 {
1834   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1835   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1836   PetscErrorCode ierr;
1837   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1838   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1839   PetscInt       *cmap,*idx_p;
1840 
1841   PetscFunctionBegin;
1842   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1843   mat->getrowactive = PETSC_TRUE;
1844 
1845   if (!mat->rowvalues && (idx || v)) {
1846     /*
1847         allocate enough space to hold information from the longest row.
1848     */
1849     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1850     PetscInt   max = 1,tmp;
1851     for (i=0; i<matin->rmap->n; i++) {
1852       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1853       if (max < tmp) { max = tmp; }
1854     }
1855     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1856   }
1857 
1858   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1859   lrow = row - rstart;
1860 
1861   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1862   if (!v)   {pvA = 0; pvB = 0;}
1863   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1864   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1865   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1866   nztot = nzA + nzB;
1867 
1868   cmap  = mat->garray;
1869   if (v  || idx) {
1870     if (nztot) {
1871       /* Sort by increasing column numbers, assuming A and B already sorted */
1872       PetscInt imark = -1;
1873       if (v) {
1874         *v = v_p = mat->rowvalues;
1875         for (i=0; i<nzB; i++) {
1876           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1877           else break;
1878         }
1879         imark = i;
1880         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1881         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1882       }
1883       if (idx) {
1884         *idx = idx_p = mat->rowindices;
1885         if (imark > -1) {
1886           for (i=0; i<imark; i++) {
1887             idx_p[i] = cmap[cworkB[i]];
1888           }
1889         } else {
1890           for (i=0; i<nzB; i++) {
1891             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1892             else break;
1893           }
1894           imark = i;
1895         }
1896         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1897         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1898       }
1899     } else {
1900       if (idx) *idx = 0;
1901       if (v)   *v   = 0;
1902     }
1903   }
1904   *nz = nztot;
1905   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1906   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1912 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1913 {
1914   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1915 
1916   PetscFunctionBegin;
1917   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1918   aij->getrowactive = PETSC_FALSE;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatNorm_MPIAIJ"
1924 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1925 {
1926   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1927   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1928   PetscErrorCode ierr;
1929   PetscInt       i,j,cstart = mat->cmap->rstart;
1930   PetscReal      sum = 0.0;
1931   MatScalar      *v;
1932 
1933   PetscFunctionBegin;
1934   if (aij->size == 1) {
1935     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1936   } else {
1937     if (type == NORM_FROBENIUS) {
1938       v = amat->a;
1939       for (i=0; i<amat->nz; i++) {
1940 #if defined(PETSC_USE_COMPLEX)
1941         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1942 #else
1943         sum += (*v)*(*v); v++;
1944 #endif
1945       }
1946       v = bmat->a;
1947       for (i=0; i<bmat->nz; i++) {
1948 #if defined(PETSC_USE_COMPLEX)
1949         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1950 #else
1951         sum += (*v)*(*v); v++;
1952 #endif
1953       }
1954       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1955       *norm = PetscSqrtReal(*norm);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1960       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1961       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1962       *norm = 0.0;
1963       v = amat->a; jj = amat->j;
1964       for (j=0; j<amat->nz; j++) {
1965         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1966       }
1967       v = bmat->a; jj = bmat->j;
1968       for (j=0; j<bmat->nz; j++) {
1969         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1970       }
1971       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1972       for (j=0; j<mat->cmap->N; j++) {
1973         if (tmp2[j] > *norm) *norm = tmp2[j];
1974       }
1975       ierr = PetscFree(tmp);CHKERRQ(ierr);
1976       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1992     } else {
1993       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1994     }
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatTranspose_MPIAIJ"
2001 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2002 {
2003   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2004   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2005   PetscErrorCode ierr;
2006   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2007   PetscInt       cstart=A->cmap->rstart,ncol;
2008   Mat            B;
2009   MatScalar      *array;
2010 
2011   PetscFunctionBegin;
2012   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2013 
2014   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2015   ai = Aloc->i; aj = Aloc->j;
2016   bi = Bloc->i; bj = Bloc->j;
2017   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2018     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2019     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2020     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2021     for (i=0; i<ai[ma]; i++){
2022       d_nnz[aj[i]] ++;
2023       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2024     }
2025 
2026     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2027     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2028     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2029     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2030     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2031   } else {
2032     B = *matout;
2033   }
2034 
2035   /* copy over the A part */
2036   array = Aloc->a;
2037   row = A->rmap->rstart;
2038   for (i=0; i<ma; i++) {
2039     ncol = ai[i+1]-ai[i];
2040     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2041     row++; array += ncol; aj += ncol;
2042   }
2043   aj = Aloc->j;
2044   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2045 
2046   /* copy over the B part */
2047   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2048   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2049   array = Bloc->a;
2050   row = A->rmap->rstart;
2051   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2052   cols_tmp = cols;
2053   for (i=0; i<mb; i++) {
2054     ncol = bi[i+1]-bi[i];
2055     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2056     row++; array += ncol; cols_tmp += ncol;
2057   }
2058   ierr = PetscFree(cols);CHKERRQ(ierr);
2059 
2060   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2061   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2063     *matout = B;
2064   } else {
2065     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2066   }
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2072 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2073 {
2074   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2075   Mat            a = aij->A,b = aij->B;
2076   PetscErrorCode ierr;
2077   PetscInt       s1,s2,s3;
2078 
2079   PetscFunctionBegin;
2080   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2081   if (rr) {
2082     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2083     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2084     /* Overlap communication with computation. */
2085     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2086   }
2087   if (ll) {
2088     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2089     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2090     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2091   }
2092   /* scale  the diagonal block */
2093   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2094 
2095   if (rr) {
2096     /* Do a scatter end and then right scale the off-diagonal block */
2097     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2098     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2099   }
2100 
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 #undef __FUNCT__
2105 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2106 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2107 {
2108   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2109   PetscErrorCode ierr;
2110 
2111   PetscFunctionBegin;
2112   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2113   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2114   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2115   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2120 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2121 {
2122   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2123   PetscErrorCode ierr;
2124 
2125   PetscFunctionBegin;
2126   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatEqual_MPIAIJ"
2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2133 {
2134   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2135   Mat            a,b,c,d;
2136   PetscBool      flg;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   a = matA->A; b = matA->B;
2141   c = matB->A; d = matB->B;
2142 
2143   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2144   if (flg) {
2145     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2146   }
2147   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatCopy_MPIAIJ"
2153 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2154 {
2155   PetscErrorCode ierr;
2156   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2157   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2158 
2159   PetscFunctionBegin;
2160   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2161   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2162     /* because of the column compression in the off-processor part of the matrix a->B,
2163        the number of columns in a->B and b->B may be different, hence we cannot call
2164        the MatCopy() directly on the two parts. If need be, we can provide a more
2165        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2166        then copying the submatrices */
2167     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2168   } else {
2169     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2170     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2177 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2178 {
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatAXPY_MPIAIJ"
2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2189 {
2190   PetscErrorCode ierr;
2191   PetscInt       i;
2192   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2193   PetscBLASInt   bnz,one=1;
2194   Mat_SeqAIJ     *x,*y;
2195 
2196   PetscFunctionBegin;
2197   if (str == SAME_NONZERO_PATTERN) {
2198     PetscScalar alpha = a;
2199     x = (Mat_SeqAIJ *)xx->A->data;
2200     y = (Mat_SeqAIJ *)yy->A->data;
2201     bnz = PetscBLASIntCast(x->nz);
2202     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2203     x = (Mat_SeqAIJ *)xx->B->data;
2204     y = (Mat_SeqAIJ *)yy->B->data;
2205     bnz = PetscBLASIntCast(x->nz);
2206     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2207   } else if (str == SUBSET_NONZERO_PATTERN) {
2208     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2209 
2210     x = (Mat_SeqAIJ *)xx->B->data;
2211     y = (Mat_SeqAIJ *)yy->B->data;
2212     if (y->xtoy && y->XtoY != xx->B) {
2213       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2214       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2215     }
2216     if (!y->xtoy) { /* get xtoy */
2217       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2218       y->XtoY = xx->B;
2219       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2220     }
2221     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2222   } else {
2223     Mat B;
2224     PetscInt *nnz_d,*nnz_o;
2225     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2226     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2227     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2228     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2229     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2230     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2231     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2232     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2233     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2234     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2235     ierr = MatHeaderReplace(Y,B);
2236     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2237     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2238   }
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatConjugate_MPIAIJ"
2246 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2247 {
2248 #if defined(PETSC_USE_COMPLEX)
2249   PetscErrorCode ierr;
2250   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2251 
2252   PetscFunctionBegin;
2253   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2254   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2255 #else
2256   PetscFunctionBegin;
2257 #endif
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatRealPart_MPIAIJ"
2263 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2264 {
2265   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2266   PetscErrorCode ierr;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2270   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2276 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2277 {
2278   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2283   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #ifdef PETSC_HAVE_PBGL
2288 
2289 #include <boost/parallel/mpi/bsp_process_group.hpp>
2290 #include <boost/graph/distributed/ilu_default_graph.hpp>
2291 #include <boost/graph/distributed/ilu_0_block.hpp>
2292 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2293 #include <boost/graph/distributed/petsc/interface.hpp>
2294 #include <boost/multi_array.hpp>
2295 #include <boost/parallel/distributed_property_map->hpp>
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2299 /*
2300   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2301 */
2302 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2303 {
2304   namespace petsc = boost::distributed::petsc;
2305 
2306   namespace graph_dist = boost::graph::distributed;
2307   using boost::graph::distributed::ilu_default::process_group_type;
2308   using boost::graph::ilu_permuted;
2309 
2310   PetscBool       row_identity, col_identity;
2311   PetscContainer  c;
2312   PetscInt        m, n, M, N;
2313   PetscErrorCode  ierr;
2314 
2315   PetscFunctionBegin;
2316   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2317   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2318   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2319   if (!row_identity || !col_identity) {
2320     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2321   }
2322 
2323   process_group_type pg;
2324   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2325   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2326   lgraph_type&   level_graph = *lgraph_p;
2327   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2328 
2329   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2330   ilu_permuted(level_graph);
2331 
2332   /* put together the new matrix */
2333   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2334   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2335   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2336   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2337   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2338   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340 
2341   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2342   ierr = PetscContainerSetPointer(c, lgraph_p);
2343   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2344   ierr = PetscContainerDestroy(&c);
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2350 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2351 {
2352   PetscFunctionBegin;
2353   PetscFunctionReturn(0);
2354 }
2355 
2356 #undef __FUNCT__
2357 #define __FUNCT__ "MatSolve_MPIAIJ"
2358 /*
2359   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2360 */
2361 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2362 {
2363   namespace graph_dist = boost::graph::distributed;
2364 
2365   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2366   lgraph_type*   lgraph_p;
2367   PetscContainer c;
2368   PetscErrorCode ierr;
2369 
2370   PetscFunctionBegin;
2371   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2372   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2373   ierr = VecCopy(b, x);CHKERRQ(ierr);
2374 
2375   PetscScalar* array_x;
2376   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2377   PetscInt sx;
2378   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2379 
2380   PetscScalar* array_b;
2381   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2382   PetscInt sb;
2383   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2384 
2385   lgraph_type&   level_graph = *lgraph_p;
2386   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2387 
2388   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2389   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2390                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2391 
2392   typedef boost::iterator_property_map<array_ref_type::iterator,
2393                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2394   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2395                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2396 
2397   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2398 
2399   PetscFunctionReturn(0);
2400 }
2401 #endif
2402 
2403 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2404   PetscInt       nzlocal,nsends,nrecvs;
2405   PetscMPIInt    *send_rank,*recv_rank;
2406   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2407   PetscScalar    *sbuf_a,**rbuf_a;
2408   PetscErrorCode (*Destroy)(Mat);
2409 } Mat_Redundant;
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2413 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2414 {
2415   PetscErrorCode       ierr;
2416   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2417   PetscInt             i;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2421   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2422   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2423   for (i=0; i<redund->nrecvs; i++){
2424     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2425     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2426   }
2427   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2428   ierr = PetscFree(redund);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatDestroy_MatRedundant"
2434 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2435 {
2436   PetscErrorCode  ierr;
2437   PetscContainer  container;
2438   Mat_Redundant   *redund=PETSC_NULL;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2442   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2443   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2444   A->ops->destroy = redund->Destroy;
2445   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2446   if (A->ops->destroy) {
2447     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2448   }
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2454 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2455 {
2456   PetscMPIInt    rank,size;
2457   MPI_Comm       comm=((PetscObject)mat)->comm;
2458   PetscErrorCode ierr;
2459   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2460   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2461   PetscInt       *rowrange=mat->rmap->range;
2462   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2463   Mat            A=aij->A,B=aij->B,C=*matredundant;
2464   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2465   PetscScalar    *sbuf_a;
2466   PetscInt       nzlocal=a->nz+b->nz;
2467   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2468   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2469   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2470   MatScalar      *aworkA,*aworkB;
2471   PetscScalar    *vals;
2472   PetscMPIInt    tag1,tag2,tag3,imdex;
2473   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2474                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2475   MPI_Status     recv_status,*send_status;
2476   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2477   PetscInt       **rbuf_j=PETSC_NULL;
2478   PetscScalar    **rbuf_a=PETSC_NULL;
2479   Mat_Redundant  *redund=PETSC_NULL;
2480   PetscContainer container;
2481 
2482   PetscFunctionBegin;
2483   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2484   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2485 
2486   if (reuse == MAT_REUSE_MATRIX) {
2487     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2488     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2489     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2490     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2491     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2492     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2493     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2494     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2495 
2496     nsends    = redund->nsends;
2497     nrecvs    = redund->nrecvs;
2498     send_rank = redund->send_rank;
2499     recv_rank = redund->recv_rank;
2500     sbuf_nz   = redund->sbuf_nz;
2501     rbuf_nz   = redund->rbuf_nz;
2502     sbuf_j    = redund->sbuf_j;
2503     sbuf_a    = redund->sbuf_a;
2504     rbuf_j    = redund->rbuf_j;
2505     rbuf_a    = redund->rbuf_a;
2506   }
2507 
2508   if (reuse == MAT_INITIAL_MATRIX){
2509     PetscMPIInt  subrank,subsize;
2510     PetscInt     nleftover,np_subcomm;
2511     /* get the destination processors' id send_rank, nsends and nrecvs */
2512     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2513     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2514     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2515     np_subcomm = size/nsubcomm;
2516     nleftover  = size - nsubcomm*np_subcomm;
2517     nsends = 0; nrecvs = 0;
2518     for (i=0; i<size; i++){ /* i=rank*/
2519       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2520         send_rank[nsends] = i; nsends++;
2521         recv_rank[nrecvs++] = i;
2522       }
2523     }
2524     if (rank >= size - nleftover){/* this proc is a leftover processor */
2525       i = size-nleftover-1;
2526       j = 0;
2527       while (j < nsubcomm - nleftover){
2528         send_rank[nsends++] = i;
2529         i--; j++;
2530       }
2531     }
2532 
2533     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2534       for (i=0; i<nleftover; i++){
2535         recv_rank[nrecvs++] = size-nleftover+i;
2536       }
2537     }
2538 
2539     /* allocate sbuf_j, sbuf_a */
2540     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2541     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2542     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2543   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2544 
2545   /* copy mat's local entries into the buffers */
2546   if (reuse == MAT_INITIAL_MATRIX){
2547     rownz_max = 0;
2548     rptr = sbuf_j;
2549     cols = sbuf_j + rend-rstart + 1;
2550     vals = sbuf_a;
2551     rptr[0] = 0;
2552     for (i=0; i<rend-rstart; i++){
2553       row = i + rstart;
2554       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2555       ncols  = nzA + nzB;
2556       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2557       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2558       /* load the column indices for this row into cols */
2559       lwrite = 0;
2560       for (l=0; l<nzB; l++) {
2561         if ((ctmp = bmap[cworkB[l]]) < cstart){
2562           vals[lwrite]   = aworkB[l];
2563           cols[lwrite++] = ctmp;
2564         }
2565       }
2566       for (l=0; l<nzA; l++){
2567         vals[lwrite]   = aworkA[l];
2568         cols[lwrite++] = cstart + cworkA[l];
2569       }
2570       for (l=0; l<nzB; l++) {
2571         if ((ctmp = bmap[cworkB[l]]) >= cend){
2572           vals[lwrite]   = aworkB[l];
2573           cols[lwrite++] = ctmp;
2574         }
2575       }
2576       vals += ncols;
2577       cols += ncols;
2578       rptr[i+1] = rptr[i] + ncols;
2579       if (rownz_max < ncols) rownz_max = ncols;
2580     }
2581     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2582   } else { /* only copy matrix values into sbuf_a */
2583     rptr = sbuf_j;
2584     vals = sbuf_a;
2585     rptr[0] = 0;
2586     for (i=0; i<rend-rstart; i++){
2587       row = i + rstart;
2588       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2589       ncols  = nzA + nzB;
2590       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2591       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2592       lwrite = 0;
2593       for (l=0; l<nzB; l++) {
2594         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2595       }
2596       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2597       for (l=0; l<nzB; l++) {
2598         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2599       }
2600       vals += ncols;
2601       rptr[i+1] = rptr[i] + ncols;
2602     }
2603   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2604 
2605   /* send nzlocal to others, and recv other's nzlocal */
2606   /*--------------------------------------------------*/
2607   if (reuse == MAT_INITIAL_MATRIX){
2608     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2609     s_waits2 = s_waits3 + nsends;
2610     s_waits1 = s_waits2 + nsends;
2611     r_waits1 = s_waits1 + nsends;
2612     r_waits2 = r_waits1 + nrecvs;
2613     r_waits3 = r_waits2 + nrecvs;
2614   } else {
2615     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2616     r_waits3 = s_waits3 + nsends;
2617   }
2618 
2619   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2620   if (reuse == MAT_INITIAL_MATRIX){
2621     /* get new tags to keep the communication clean */
2622     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2623     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2624     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2625 
2626     /* post receives of other's nzlocal */
2627     for (i=0; i<nrecvs; i++){
2628       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2629     }
2630     /* send nzlocal to others */
2631     for (i=0; i<nsends; i++){
2632       sbuf_nz[i] = nzlocal;
2633       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2634     }
2635     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2636     count = nrecvs;
2637     while (count) {
2638       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2639       recv_rank[imdex] = recv_status.MPI_SOURCE;
2640       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2641       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2642 
2643       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2644       rbuf_nz[imdex] += i + 2;
2645       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2646       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2647       count--;
2648     }
2649     /* wait on sends of nzlocal */
2650     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2651     /* send mat->i,j to others, and recv from other's */
2652     /*------------------------------------------------*/
2653     for (i=0; i<nsends; i++){
2654       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2655       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2656     }
2657     /* wait on receives of mat->i,j */
2658     /*------------------------------*/
2659     count = nrecvs;
2660     while (count) {
2661       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2662       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2663       count--;
2664     }
2665     /* wait on sends of mat->i,j */
2666     /*---------------------------*/
2667     if (nsends) {
2668       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2669     }
2670   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2671 
2672   /* post receives, send and receive mat->a */
2673   /*----------------------------------------*/
2674   for (imdex=0; imdex<nrecvs; imdex++) {
2675     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2676   }
2677   for (i=0; i<nsends; i++){
2678     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2679   }
2680   count = nrecvs;
2681   while (count) {
2682     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2683     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2684     count--;
2685   }
2686   if (nsends) {
2687     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2688   }
2689 
2690   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2691 
2692   /* create redundant matrix */
2693   /*-------------------------*/
2694   if (reuse == MAT_INITIAL_MATRIX){
2695     /* compute rownz_max for preallocation */
2696     for (imdex=0; imdex<nrecvs; imdex++){
2697       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2698       rptr = rbuf_j[imdex];
2699       for (i=0; i<j; i++){
2700         ncols = rptr[i+1] - rptr[i];
2701         if (rownz_max < ncols) rownz_max = ncols;
2702       }
2703     }
2704 
2705     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2706     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2707     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2708     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2709     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2710   } else {
2711     C = *matredundant;
2712   }
2713 
2714   /* insert local matrix entries */
2715   rptr = sbuf_j;
2716   cols = sbuf_j + rend-rstart + 1;
2717   vals = sbuf_a;
2718   for (i=0; i<rend-rstart; i++){
2719     row   = i + rstart;
2720     ncols = rptr[i+1] - rptr[i];
2721     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2722     vals += ncols;
2723     cols += ncols;
2724   }
2725   /* insert received matrix entries */
2726   for (imdex=0; imdex<nrecvs; imdex++){
2727     rstart = rowrange[recv_rank[imdex]];
2728     rend   = rowrange[recv_rank[imdex]+1];
2729     rptr = rbuf_j[imdex];
2730     cols = rbuf_j[imdex] + rend-rstart + 1;
2731     vals = rbuf_a[imdex];
2732     for (i=0; i<rend-rstart; i++){
2733       row   = i + rstart;
2734       ncols = rptr[i+1] - rptr[i];
2735       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2736       vals += ncols;
2737       cols += ncols;
2738     }
2739   }
2740   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2741   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2742   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2743   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2744   if (reuse == MAT_INITIAL_MATRIX) {
2745     PetscContainer container;
2746     *matredundant = C;
2747     /* create a supporting struct and attach it to C for reuse */
2748     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2749     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2750     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2751     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2752     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2753     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2754 
2755     redund->nzlocal = nzlocal;
2756     redund->nsends  = nsends;
2757     redund->nrecvs  = nrecvs;
2758     redund->send_rank = send_rank;
2759     redund->recv_rank = recv_rank;
2760     redund->sbuf_nz = sbuf_nz;
2761     redund->rbuf_nz = rbuf_nz;
2762     redund->sbuf_j  = sbuf_j;
2763     redund->sbuf_a  = sbuf_a;
2764     redund->rbuf_j  = rbuf_j;
2765     redund->rbuf_a  = rbuf_a;
2766 
2767     redund->Destroy = C->ops->destroy;
2768     C->ops->destroy = MatDestroy_MatRedundant;
2769   }
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2775 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2776 {
2777   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2778   PetscErrorCode ierr;
2779   PetscInt       i,*idxb = 0;
2780   PetscScalar    *va,*vb;
2781   Vec            vtmp;
2782 
2783   PetscFunctionBegin;
2784   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2785   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2786   if (idx) {
2787     for (i=0; i<A->rmap->n; i++) {
2788       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2789     }
2790   }
2791 
2792   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2793   if (idx) {
2794     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2795   }
2796   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2797   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2798 
2799   for (i=0; i<A->rmap->n; i++){
2800     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2801       va[i] = vb[i];
2802       if (idx) idx[i] = a->garray[idxb[i]];
2803     }
2804   }
2805 
2806   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2807   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2808   ierr = PetscFree(idxb);CHKERRQ(ierr);
2809   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 #undef __FUNCT__
2814 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2815 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2816 {
2817   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2818   PetscErrorCode ierr;
2819   PetscInt       i,*idxb = 0;
2820   PetscScalar    *va,*vb;
2821   Vec            vtmp;
2822 
2823   PetscFunctionBegin;
2824   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2825   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2826   if (idx) {
2827     for (i=0; i<A->cmap->n; i++) {
2828       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2829     }
2830   }
2831 
2832   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2833   if (idx) {
2834     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2835   }
2836   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2837   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2838 
2839   for (i=0; i<A->rmap->n; i++){
2840     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2841       va[i] = vb[i];
2842       if (idx) idx[i] = a->garray[idxb[i]];
2843     }
2844   }
2845 
2846   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2847   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2848   ierr = PetscFree(idxb);CHKERRQ(ierr);
2849   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 #undef __FUNCT__
2854 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2855 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2856 {
2857   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2858   PetscInt       n      = A->rmap->n;
2859   PetscInt       cstart = A->cmap->rstart;
2860   PetscInt      *cmap   = mat->garray;
2861   PetscInt      *diagIdx, *offdiagIdx;
2862   Vec            diagV, offdiagV;
2863   PetscScalar   *a, *diagA, *offdiagA;
2864   PetscInt       r;
2865   PetscErrorCode ierr;
2866 
2867   PetscFunctionBegin;
2868   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2869   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2870   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2871   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2872   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2873   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2874   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2875   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2876   for(r = 0; r < n; ++r) {
2877     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2878       a[r]   = diagA[r];
2879       idx[r] = cstart + diagIdx[r];
2880     } else {
2881       a[r]   = offdiagA[r];
2882       idx[r] = cmap[offdiagIdx[r]];
2883     }
2884   }
2885   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2886   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2887   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2888   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2889   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2890   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2896 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2897 {
2898   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2899   PetscInt       n      = A->rmap->n;
2900   PetscInt       cstart = A->cmap->rstart;
2901   PetscInt      *cmap   = mat->garray;
2902   PetscInt      *diagIdx, *offdiagIdx;
2903   Vec            diagV, offdiagV;
2904   PetscScalar   *a, *diagA, *offdiagA;
2905   PetscInt       r;
2906   PetscErrorCode ierr;
2907 
2908   PetscFunctionBegin;
2909   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2910   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2911   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2912   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2913   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2914   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2915   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2916   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2917   for(r = 0; r < n; ++r) {
2918     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2919       a[r]   = diagA[r];
2920       idx[r] = cstart + diagIdx[r];
2921     } else {
2922       a[r]   = offdiagA[r];
2923       idx[r] = cmap[offdiagIdx[r]];
2924     }
2925   }
2926   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2928   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2929   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2930   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2931   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #undef __FUNCT__
2936 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2937 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2938 {
2939   PetscErrorCode ierr;
2940   Mat            *dummy;
2941 
2942   PetscFunctionBegin;
2943   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2944   *newmat = *dummy;
2945   ierr = PetscFree(dummy);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2950 
2951 #undef __FUNCT__
2952 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2953 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2954 {
2955   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2956   PetscErrorCode ierr;
2957 
2958   PetscFunctionBegin;
2959   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 
2964 /* -------------------------------------------------------------------*/
2965 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2966        MatGetRow_MPIAIJ,
2967        MatRestoreRow_MPIAIJ,
2968        MatMult_MPIAIJ,
2969 /* 4*/ MatMultAdd_MPIAIJ,
2970        MatMultTranspose_MPIAIJ,
2971        MatMultTransposeAdd_MPIAIJ,
2972 #ifdef PETSC_HAVE_PBGL
2973        MatSolve_MPIAIJ,
2974 #else
2975        0,
2976 #endif
2977        0,
2978        0,
2979 /*10*/ 0,
2980        0,
2981        0,
2982        MatSOR_MPIAIJ,
2983        MatTranspose_MPIAIJ,
2984 /*15*/ MatGetInfo_MPIAIJ,
2985        MatEqual_MPIAIJ,
2986        MatGetDiagonal_MPIAIJ,
2987        MatDiagonalScale_MPIAIJ,
2988        MatNorm_MPIAIJ,
2989 /*20*/ MatAssemblyBegin_MPIAIJ,
2990        MatAssemblyEnd_MPIAIJ,
2991        MatSetOption_MPIAIJ,
2992        MatZeroEntries_MPIAIJ,
2993 /*24*/ MatZeroRows_MPIAIJ,
2994        0,
2995 #ifdef PETSC_HAVE_PBGL
2996        0,
2997 #else
2998        0,
2999 #endif
3000        0,
3001        0,
3002 /*29*/ MatSetUpPreallocation_MPIAIJ,
3003 #ifdef PETSC_HAVE_PBGL
3004        0,
3005 #else
3006        0,
3007 #endif
3008        0,
3009        0,
3010        0,
3011 /*34*/ MatDuplicate_MPIAIJ,
3012        0,
3013        0,
3014        0,
3015        0,
3016 /*39*/ MatAXPY_MPIAIJ,
3017        MatGetSubMatrices_MPIAIJ,
3018        MatIncreaseOverlap_MPIAIJ,
3019        MatGetValues_MPIAIJ,
3020        MatCopy_MPIAIJ,
3021 /*44*/ MatGetRowMax_MPIAIJ,
3022        MatScale_MPIAIJ,
3023        0,
3024        0,
3025        MatZeroRowsColumns_MPIAIJ,
3026 /*49*/ MatSetBlockSize_MPIAIJ,
3027        0,
3028        0,
3029        0,
3030        0,
3031 /*54*/ MatFDColoringCreate_MPIAIJ,
3032        0,
3033        MatSetUnfactored_MPIAIJ,
3034        MatPermute_MPIAIJ,
3035        0,
3036 /*59*/ MatGetSubMatrix_MPIAIJ,
3037        MatDestroy_MPIAIJ,
3038        MatView_MPIAIJ,
3039        0,
3040        0,
3041 /*64*/ 0,
3042        0,
3043        0,
3044        0,
3045        0,
3046 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3047        MatGetRowMinAbs_MPIAIJ,
3048        0,
3049        MatSetColoring_MPIAIJ,
3050 #if defined(PETSC_HAVE_ADIC)
3051        MatSetValuesAdic_MPIAIJ,
3052 #else
3053        0,
3054 #endif
3055        MatSetValuesAdifor_MPIAIJ,
3056 /*75*/ MatFDColoringApply_AIJ,
3057        0,
3058        0,
3059        0,
3060        0,
3061 /*80*/ 0,
3062        0,
3063        0,
3064 /*83*/ MatLoad_MPIAIJ,
3065        0,
3066        0,
3067        0,
3068        0,
3069        0,
3070 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3071        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3072        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3073        MatPtAP_Basic,
3074        MatPtAPSymbolic_MPIAIJ,
3075 /*94*/ MatPtAPNumeric_MPIAIJ,
3076        0,
3077        0,
3078        0,
3079        0,
3080 /*99*/ 0,
3081        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3082        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3083        MatConjugate_MPIAIJ,
3084        0,
3085 /*104*/MatSetValuesRow_MPIAIJ,
3086        MatRealPart_MPIAIJ,
3087        MatImaginaryPart_MPIAIJ,
3088        0,
3089        0,
3090 /*109*/0,
3091        MatGetRedundantMatrix_MPIAIJ,
3092        MatGetRowMin_MPIAIJ,
3093        0,
3094        0,
3095 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3096        0,
3097        0,
3098        0,
3099        0,
3100 /*119*/0,
3101        0,
3102        0,
3103        0,
3104        MatGetMultiProcBlock_MPIAIJ,
3105 /*124*/MatFindNonZeroRows_MPIAIJ,
3106        MatGetColumnNorms_MPIAIJ,
3107        MatInvertBlockDiagonal_MPIAIJ,
3108        0,
3109        MatGetSubMatricesParallel_MPIAIJ,
3110 /*129*/0,
3111        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3112        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3113        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3114        0,
3115 /*134*/0,
3116        0,
3117        0,
3118        0,
3119        0
3120 };
3121 
3122 /* ----------------------------------------------------------------------------------------*/
3123 
3124 EXTERN_C_BEGIN
3125 #undef __FUNCT__
3126 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3127 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3128 {
3129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3130   PetscErrorCode ierr;
3131 
3132   PetscFunctionBegin;
3133   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3134   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3135   PetscFunctionReturn(0);
3136 }
3137 EXTERN_C_END
3138 
3139 EXTERN_C_BEGIN
3140 #undef __FUNCT__
3141 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3142 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3143 {
3144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3145   PetscErrorCode ierr;
3146 
3147   PetscFunctionBegin;
3148   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3149   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3150   PetscFunctionReturn(0);
3151 }
3152 EXTERN_C_END
3153 
3154 EXTERN_C_BEGIN
3155 #undef __FUNCT__
3156 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3157 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3158 {
3159   Mat_MPIAIJ     *b;
3160   PetscErrorCode ierr;
3161   PetscInt       i;
3162 
3163   PetscFunctionBegin;
3164   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3165   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3166   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3167   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3168 
3169   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3170   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3171   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3172   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3173   if (d_nnz) {
3174     for (i=0; i<B->rmap->n; i++) {
3175       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3176     }
3177   }
3178   if (o_nnz) {
3179     for (i=0; i<B->rmap->n; i++) {
3180       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3181     }
3182   }
3183   b = (Mat_MPIAIJ*)B->data;
3184 
3185   if (!B->preallocated) {
3186     /* Explicitly create 2 MATSEQAIJ matrices. */
3187     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3188     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3189     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3190     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3191     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3192     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3193     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3194     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3195   }
3196 
3197   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3198   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3199   B->preallocated = PETSC_TRUE;
3200   ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 EXTERN_C_END
3204 
3205 #undef __FUNCT__
3206 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3207 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3208 {
3209   Mat            mat;
3210   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3211   PetscErrorCode ierr;
3212 
3213   PetscFunctionBegin;
3214   *newmat       = 0;
3215   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3216   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3217   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3218   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3219   a    = (Mat_MPIAIJ*)mat->data;
3220 
3221   mat->factortype    = matin->factortype;
3222   mat->rmap->bs      = matin->rmap->bs;
3223   mat->assembled    = PETSC_TRUE;
3224   mat->insertmode   = NOT_SET_VALUES;
3225   mat->preallocated = PETSC_TRUE;
3226 
3227   a->size           = oldmat->size;
3228   a->rank           = oldmat->rank;
3229   a->donotstash     = oldmat->donotstash;
3230   a->roworiented    = oldmat->roworiented;
3231   a->rowindices     = 0;
3232   a->rowvalues      = 0;
3233   a->getrowactive   = PETSC_FALSE;
3234 
3235   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3236   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3237 
3238   if (oldmat->colmap) {
3239 #if defined (PETSC_USE_CTABLE)
3240     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3241 #else
3242     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3243     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3244     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3245 #endif
3246   } else a->colmap = 0;
3247   if (oldmat->garray) {
3248     PetscInt len;
3249     len  = oldmat->B->cmap->n;
3250     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3251     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3252     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3253   } else a->garray = 0;
3254 
3255   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3256   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3257   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3258   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3259   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3260   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3261   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3262   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3263   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3264   *newmat = mat;
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 
3269 
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatLoad_MPIAIJ"
3272 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3273 {
3274   PetscScalar    *vals,*svals;
3275   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3276   PetscErrorCode ierr;
3277   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3278   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3279   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3280   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3281   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3282   int            fd;
3283 
3284   PetscFunctionBegin;
3285   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3286   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3287   if (!rank) {
3288     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3289     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3290     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3291   }
3292 
3293   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3294 
3295   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3296   M = header[1]; N = header[2];
3297   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3298   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3299   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3300 
3301   /* If global sizes are set, check if they are consistent with that given in the file */
3302   if (sizesset) {
3303     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3304   }
3305   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3306   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3307 
3308   /* determine ownership of all rows */
3309   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3310   else m = newMat->rmap->n; /* Set by user */
3311 
3312   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3313   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3314 
3315   /* First process needs enough room for process with most rows */
3316   if (!rank) {
3317     mmax       = rowners[1];
3318     for (i=2; i<size; i++) {
3319       mmax = PetscMax(mmax,rowners[i]);
3320     }
3321   } else mmax = m;
3322 
3323   rowners[0] = 0;
3324   for (i=2; i<=size; i++) {
3325     rowners[i] += rowners[i-1];
3326   }
3327   rstart = rowners[rank];
3328   rend   = rowners[rank+1];
3329 
3330   /* distribute row lengths to all processors */
3331   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3332   if (!rank) {
3333     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3334     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3335     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3336     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3337     for (j=0; j<m; j++) {
3338       procsnz[0] += ourlens[j];
3339     }
3340     for (i=1; i<size; i++) {
3341       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3342       /* calculate the number of nonzeros on each processor */
3343       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3344         procsnz[i] += rowlengths[j];
3345       }
3346       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3347     }
3348     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3349   } else {
3350     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3351   }
3352 
3353   if (!rank) {
3354     /* determine max buffer needed and allocate it */
3355     maxnz = 0;
3356     for (i=0; i<size; i++) {
3357       maxnz = PetscMax(maxnz,procsnz[i]);
3358     }
3359     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3360 
3361     /* read in my part of the matrix column indices  */
3362     nz   = procsnz[0];
3363     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3364     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3365 
3366     /* read in every one elses and ship off */
3367     for (i=1; i<size; i++) {
3368       nz     = procsnz[i];
3369       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3370       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3371     }
3372     ierr = PetscFree(cols);CHKERRQ(ierr);
3373   } else {
3374     /* determine buffer space needed for message */
3375     nz = 0;
3376     for (i=0; i<m; i++) {
3377       nz += ourlens[i];
3378     }
3379     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3380 
3381     /* receive message of column indices*/
3382     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3383   }
3384 
3385   /* determine column ownership if matrix is not square */
3386   if (N != M) {
3387     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3388     else n = newMat->cmap->n;
3389     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3390     cstart = cend - n;
3391   } else {
3392     cstart = rstart;
3393     cend   = rend;
3394     n      = cend - cstart;
3395   }
3396 
3397   /* loop over local rows, determining number of off diagonal entries */
3398   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3399   jj = 0;
3400   for (i=0; i<m; i++) {
3401     for (j=0; j<ourlens[i]; j++) {
3402       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3403       jj++;
3404     }
3405   }
3406 
3407   for (i=0; i<m; i++) {
3408     ourlens[i] -= offlens[i];
3409   }
3410   if (!sizesset) {
3411     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3412   }
3413   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3414 
3415   for (i=0; i<m; i++) {
3416     ourlens[i] += offlens[i];
3417   }
3418 
3419   if (!rank) {
3420     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3421 
3422     /* read in my part of the matrix numerical values  */
3423     nz   = procsnz[0];
3424     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3425 
3426     /* insert into matrix */
3427     jj      = rstart;
3428     smycols = mycols;
3429     svals   = vals;
3430     for (i=0; i<m; i++) {
3431       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3432       smycols += ourlens[i];
3433       svals   += ourlens[i];
3434       jj++;
3435     }
3436 
3437     /* read in other processors and ship out */
3438     for (i=1; i<size; i++) {
3439       nz     = procsnz[i];
3440       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3441       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3442     }
3443     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3444   } else {
3445     /* receive numeric values */
3446     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3447 
3448     /* receive message of values*/
3449     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3450 
3451     /* insert into matrix */
3452     jj      = rstart;
3453     smycols = mycols;
3454     svals   = vals;
3455     for (i=0; i<m; i++) {
3456       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3457       smycols += ourlens[i];
3458       svals   += ourlens[i];
3459       jj++;
3460     }
3461   }
3462   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3463   ierr = PetscFree(vals);CHKERRQ(ierr);
3464   ierr = PetscFree(mycols);CHKERRQ(ierr);
3465   ierr = PetscFree(rowners);CHKERRQ(ierr);
3466 
3467   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3468   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3469   PetscFunctionReturn(0);
3470 }
3471 
3472 #undef __FUNCT__
3473 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3474 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3475 {
3476   PetscErrorCode ierr;
3477   IS             iscol_local;
3478   PetscInt       csize;
3479 
3480   PetscFunctionBegin;
3481   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3482   if (call == MAT_REUSE_MATRIX) {
3483     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3484     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3485   } else {
3486     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3487   }
3488   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3489   if (call == MAT_INITIAL_MATRIX) {
3490     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3491     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3492   }
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3498 /*
3499     Not great since it makes two copies of the submatrix, first an SeqAIJ
3500   in local and then by concatenating the local matrices the end result.
3501   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3502 
3503   Note: This requires a sequential iscol with all indices.
3504 */
3505 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3506 {
3507   PetscErrorCode ierr;
3508   PetscMPIInt    rank,size;
3509   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3510   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3511   Mat            *local,M,Mreuse;
3512   MatScalar      *vwork,*aa;
3513   MPI_Comm       comm = ((PetscObject)mat)->comm;
3514   Mat_SeqAIJ     *aij;
3515 
3516 
3517   PetscFunctionBegin;
3518   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3520 
3521   if (call ==  MAT_REUSE_MATRIX) {
3522     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3523     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3524     local = &Mreuse;
3525     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3526   } else {
3527     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3528     Mreuse = *local;
3529     ierr   = PetscFree(local);CHKERRQ(ierr);
3530   }
3531 
3532   /*
3533       m - number of local rows
3534       n - number of columns (same on all processors)
3535       rstart - first row in new global matrix generated
3536   */
3537   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3538   if (call == MAT_INITIAL_MATRIX) {
3539     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3540     ii  = aij->i;
3541     jj  = aij->j;
3542 
3543     /*
3544         Determine the number of non-zeros in the diagonal and off-diagonal
3545         portions of the matrix in order to do correct preallocation
3546     */
3547 
3548     /* first get start and end of "diagonal" columns */
3549     if (csize == PETSC_DECIDE) {
3550       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3551       if (mglobal == n) { /* square matrix */
3552 	nlocal = m;
3553       } else {
3554         nlocal = n/size + ((n % size) > rank);
3555       }
3556     } else {
3557       nlocal = csize;
3558     }
3559     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3560     rstart = rend - nlocal;
3561     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3562 
3563     /* next, compute all the lengths */
3564     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3565     olens = dlens + m;
3566     for (i=0; i<m; i++) {
3567       jend = ii[i+1] - ii[i];
3568       olen = 0;
3569       dlen = 0;
3570       for (j=0; j<jend; j++) {
3571         if (*jj < rstart || *jj >= rend) olen++;
3572         else dlen++;
3573         jj++;
3574       }
3575       olens[i] = olen;
3576       dlens[i] = dlen;
3577     }
3578     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3579     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3580     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3581     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3582     ierr = PetscFree(dlens);CHKERRQ(ierr);
3583   } else {
3584     PetscInt ml,nl;
3585 
3586     M = *newmat;
3587     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3588     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3589     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3590     /*
3591          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3592        rather than the slower MatSetValues().
3593     */
3594     M->was_assembled = PETSC_TRUE;
3595     M->assembled     = PETSC_FALSE;
3596   }
3597   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3598   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3599   ii  = aij->i;
3600   jj  = aij->j;
3601   aa  = aij->a;
3602   for (i=0; i<m; i++) {
3603     row   = rstart + i;
3604     nz    = ii[i+1] - ii[i];
3605     cwork = jj;     jj += nz;
3606     vwork = aa;     aa += nz;
3607     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3608   }
3609 
3610   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3612   *newmat = M;
3613 
3614   /* save submatrix used in processor for next request */
3615   if (call ==  MAT_INITIAL_MATRIX) {
3616     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3617     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3618   }
3619 
3620   PetscFunctionReturn(0);
3621 }
3622 
3623 EXTERN_C_BEGIN
3624 #undef __FUNCT__
3625 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3626 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3627 {
3628   PetscInt       m,cstart, cend,j,nnz,i,d;
3629   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3630   const PetscInt *JJ;
3631   PetscScalar    *values;
3632   PetscErrorCode ierr;
3633 
3634   PetscFunctionBegin;
3635   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3636 
3637   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3638   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3639   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3640   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3641   m      = B->rmap->n;
3642   cstart = B->cmap->rstart;
3643   cend   = B->cmap->rend;
3644   rstart = B->rmap->rstart;
3645 
3646   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3647 
3648 #if defined(PETSC_USE_DEBUGGING)
3649   for (i=0; i<m; i++) {
3650     nnz     = Ii[i+1]- Ii[i];
3651     JJ      = J + Ii[i];
3652     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3653     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3654     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3655   }
3656 #endif
3657 
3658   for (i=0; i<m; i++) {
3659     nnz     = Ii[i+1]- Ii[i];
3660     JJ      = J + Ii[i];
3661     nnz_max = PetscMax(nnz_max,nnz);
3662     d       = 0;
3663     for (j=0; j<nnz; j++) {
3664       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3665     }
3666     d_nnz[i] = d;
3667     o_nnz[i] = nnz - d;
3668   }
3669   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3670   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3671 
3672   if (v) values = (PetscScalar*)v;
3673   else {
3674     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3675     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3676   }
3677 
3678   for (i=0; i<m; i++) {
3679     ii   = i + rstart;
3680     nnz  = Ii[i+1]- Ii[i];
3681     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3682   }
3683   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3685 
3686   if (!v) {
3687     ierr = PetscFree(values);CHKERRQ(ierr);
3688   }
3689   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3690   PetscFunctionReturn(0);
3691 }
3692 EXTERN_C_END
3693 
3694 #undef __FUNCT__
3695 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3696 /*@
3697    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3698    (the default parallel PETSc format).
3699 
3700    Collective on MPI_Comm
3701 
3702    Input Parameters:
3703 +  B - the matrix
3704 .  i - the indices into j for the start of each local row (starts with zero)
3705 .  j - the column indices for each local row (starts with zero)
3706 -  v - optional values in the matrix
3707 
3708    Level: developer
3709 
3710    Notes:
3711        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3712      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3713      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3714 
3715        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3716 
3717        The format which is used for the sparse matrix input, is equivalent to a
3718     row-major ordering.. i.e for the following matrix, the input data expected is
3719     as shown:
3720 
3721         1 0 0
3722         2 0 3     P0
3723        -------
3724         4 5 6     P1
3725 
3726      Process0 [P0]: rows_owned=[0,1]
3727         i =  {0,1,3}  [size = nrow+1  = 2+1]
3728         j =  {0,0,2}  [size = nz = 6]
3729         v =  {1,2,3}  [size = nz = 6]
3730 
3731      Process1 [P1]: rows_owned=[2]
3732         i =  {0,3}    [size = nrow+1  = 1+1]
3733         j =  {0,1,2}  [size = nz = 6]
3734         v =  {4,5,6}  [size = nz = 6]
3735 
3736 .keywords: matrix, aij, compressed row, sparse, parallel
3737 
3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3739           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3740 @*/
3741 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3742 {
3743   PetscErrorCode ierr;
3744 
3745   PetscFunctionBegin;
3746   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3747   PetscFunctionReturn(0);
3748 }
3749 
3750 #undef __FUNCT__
3751 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3752 /*@C
3753    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3754    (the default parallel PETSc format).  For good matrix assembly performance
3755    the user should preallocate the matrix storage by setting the parameters
3756    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3757    performance can be increased by more than a factor of 50.
3758 
3759    Collective on MPI_Comm
3760 
3761    Input Parameters:
3762 +  A - the matrix
3763 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3764            (same value is used for all local rows)
3765 .  d_nnz - array containing the number of nonzeros in the various rows of the
3766            DIAGONAL portion of the local submatrix (possibly different for each row)
3767            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3768            The size of this array is equal to the number of local rows, i.e 'm'.
3769            For matrices that will be factored, you must leave room for (and set)
3770            the diagonal entry even if it is zero.
3771 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3772            submatrix (same value is used for all local rows).
3773 -  o_nnz - array containing the number of nonzeros in the various rows of the
3774            OFF-DIAGONAL portion of the local submatrix (possibly different for
3775            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3776            structure. The size of this array is equal to the number
3777            of local rows, i.e 'm'.
3778 
3779    If the *_nnz parameter is given then the *_nz parameter is ignored
3780 
3781    The AIJ format (also called the Yale sparse matrix format or
3782    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3783    storage.  The stored row and column indices begin with zero.
3784    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3785 
3786    The parallel matrix is partitioned such that the first m0 rows belong to
3787    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3788    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3789 
3790    The DIAGONAL portion of the local submatrix of a processor can be defined
3791    as the submatrix which is obtained by extraction the part corresponding to
3792    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3793    first row that belongs to the processor, r2 is the last row belonging to
3794    the this processor, and c1-c2 is range of indices of the local part of a
3795    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3796    common case of a square matrix, the row and column ranges are the same and
3797    the DIAGONAL part is also square. The remaining portion of the local
3798    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3799 
3800    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3801 
3802    You can call MatGetInfo() to get information on how effective the preallocation was;
3803    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3804    You can also run with the option -info and look for messages with the string
3805    malloc in them to see if additional memory allocation was needed.
3806 
3807    Example usage:
3808 
3809    Consider the following 8x8 matrix with 34 non-zero values, that is
3810    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3811    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3812    as follows:
3813 
3814 .vb
3815             1  2  0  |  0  3  0  |  0  4
3816     Proc0   0  5  6  |  7  0  0  |  8  0
3817             9  0 10  | 11  0  0  | 12  0
3818     -------------------------------------
3819            13  0 14  | 15 16 17  |  0  0
3820     Proc1   0 18  0  | 19 20 21  |  0  0
3821             0  0  0  | 22 23  0  | 24  0
3822     -------------------------------------
3823     Proc2  25 26 27  |  0  0 28  | 29  0
3824            30  0  0  | 31 32 33  |  0 34
3825 .ve
3826 
3827    This can be represented as a collection of submatrices as:
3828 
3829 .vb
3830       A B C
3831       D E F
3832       G H I
3833 .ve
3834 
3835    Where the submatrices A,B,C are owned by proc0, D,E,F are
3836    owned by proc1, G,H,I are owned by proc2.
3837 
3838    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3839    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3840    The 'M','N' parameters are 8,8, and have the same values on all procs.
3841 
3842    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3843    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3844    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3845    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3846    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3847    matrix, ans [DF] as another SeqAIJ matrix.
3848 
3849    When d_nz, o_nz parameters are specified, d_nz storage elements are
3850    allocated for every row of the local diagonal submatrix, and o_nz
3851    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3852    One way to choose d_nz and o_nz is to use the max nonzerors per local
3853    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3854    In this case, the values of d_nz,o_nz are:
3855 .vb
3856      proc0 : dnz = 2, o_nz = 2
3857      proc1 : dnz = 3, o_nz = 2
3858      proc2 : dnz = 1, o_nz = 4
3859 .ve
3860    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3861    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3862    for proc3. i.e we are using 12+15+10=37 storage locations to store
3863    34 values.
3864 
3865    When d_nnz, o_nnz parameters are specified, the storage is specified
3866    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3867    In the above case the values for d_nnz,o_nnz are:
3868 .vb
3869      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3870      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3871      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3872 .ve
3873    Here the space allocated is sum of all the above values i.e 34, and
3874    hence pre-allocation is perfect.
3875 
3876    Level: intermediate
3877 
3878 .keywords: matrix, aij, compressed row, sparse, parallel
3879 
3880 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3881           MPIAIJ, MatGetInfo()
3882 @*/
3883 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3884 {
3885   PetscErrorCode ierr;
3886 
3887   PetscFunctionBegin;
3888   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3889   PetscValidType(B,1);
3890   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3891   PetscFunctionReturn(0);
3892 }
3893 
3894 #undef __FUNCT__
3895 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3896 /*@
3897      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3898          CSR format the local rows.
3899 
3900    Collective on MPI_Comm
3901 
3902    Input Parameters:
3903 +  comm - MPI communicator
3904 .  m - number of local rows (Cannot be PETSC_DECIDE)
3905 .  n - This value should be the same as the local size used in creating the
3906        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3907        calculated if N is given) For square matrices n is almost always m.
3908 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3909 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3910 .   i - row indices
3911 .   j - column indices
3912 -   a - matrix values
3913 
3914    Output Parameter:
3915 .   mat - the matrix
3916 
3917    Level: intermediate
3918 
3919    Notes:
3920        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3921      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3922      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3923 
3924        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3925 
3926        The format which is used for the sparse matrix input, is equivalent to a
3927     row-major ordering.. i.e for the following matrix, the input data expected is
3928     as shown:
3929 
3930         1 0 0
3931         2 0 3     P0
3932        -------
3933         4 5 6     P1
3934 
3935      Process0 [P0]: rows_owned=[0,1]
3936         i =  {0,1,3}  [size = nrow+1  = 2+1]
3937         j =  {0,0,2}  [size = nz = 6]
3938         v =  {1,2,3}  [size = nz = 6]
3939 
3940      Process1 [P1]: rows_owned=[2]
3941         i =  {0,3}    [size = nrow+1  = 1+1]
3942         j =  {0,1,2}  [size = nz = 6]
3943         v =  {4,5,6}  [size = nz = 6]
3944 
3945 .keywords: matrix, aij, compressed row, sparse, parallel
3946 
3947 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3948           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3949 @*/
3950 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3951 {
3952   PetscErrorCode ierr;
3953 
3954  PetscFunctionBegin;
3955   if (i[0]) {
3956     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3957   }
3958   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3959   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3960   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3961   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3962   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 #undef __FUNCT__
3967 #define __FUNCT__ "MatCreateMPIAIJ"
3968 /*@C
3969    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3970    (the default parallel PETSc format).  For good matrix assembly performance
3971    the user should preallocate the matrix storage by setting the parameters
3972    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3973    performance can be increased by more than a factor of 50.
3974 
3975    Collective on MPI_Comm
3976 
3977    Input Parameters:
3978 +  comm - MPI communicator
3979 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3980            This value should be the same as the local size used in creating the
3981            y vector for the matrix-vector product y = Ax.
3982 .  n - This value should be the same as the local size used in creating the
3983        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3984        calculated if N is given) For square matrices n is almost always m.
3985 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3986 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3987 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3988            (same value is used for all local rows)
3989 .  d_nnz - array containing the number of nonzeros in the various rows of the
3990            DIAGONAL portion of the local submatrix (possibly different for each row)
3991            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3992            The size of this array is equal to the number of local rows, i.e 'm'.
3993            You must leave room for the diagonal entry even if it is zero.
3994 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3995            submatrix (same value is used for all local rows).
3996 -  o_nnz - array containing the number of nonzeros in the various rows of the
3997            OFF-DIAGONAL portion of the local submatrix (possibly different for
3998            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3999            structure. The size of this array is equal to the number
4000            of local rows, i.e 'm'.
4001 
4002    Output Parameter:
4003 .  A - the matrix
4004 
4005    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4006    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4007    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4008 
4009    Notes:
4010    If the *_nnz parameter is given then the *_nz parameter is ignored
4011 
4012    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4013    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4014    storage requirements for this matrix.
4015 
4016    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4017    processor than it must be used on all processors that share the object for
4018    that argument.
4019 
4020    The user MUST specify either the local or global matrix dimensions
4021    (possibly both).
4022 
4023    The parallel matrix is partitioned across processors such that the
4024    first m0 rows belong to process 0, the next m1 rows belong to
4025    process 1, the next m2 rows belong to process 2 etc.. where
4026    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4027    values corresponding to [m x N] submatrix.
4028 
4029    The columns are logically partitioned with the n0 columns belonging
4030    to 0th partition, the next n1 columns belonging to the next
4031    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4032 
4033    The DIAGONAL portion of the local submatrix on any given processor
4034    is the submatrix corresponding to the rows and columns m,n
4035    corresponding to the given processor. i.e diagonal matrix on
4036    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4037    etc. The remaining portion of the local submatrix [m x (N-n)]
4038    constitute the OFF-DIAGONAL portion. The example below better
4039    illustrates this concept.
4040 
4041    For a square global matrix we define each processor's diagonal portion
4042    to be its local rows and the corresponding columns (a square submatrix);
4043    each processor's off-diagonal portion encompasses the remainder of the
4044    local matrix (a rectangular submatrix).
4045 
4046    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4047 
4048    When calling this routine with a single process communicator, a matrix of
4049    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4050    type of communicator, use the construction mechanism:
4051      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4052 
4053    By default, this format uses inodes (identical nodes) when possible.
4054    We search for consecutive rows with the same nonzero structure, thereby
4055    reusing matrix information to achieve increased efficiency.
4056 
4057    Options Database Keys:
4058 +  -mat_no_inode  - Do not use inodes
4059 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4060 -  -mat_aij_oneindex - Internally use indexing starting at 1
4061         rather than 0.  Note that when calling MatSetValues(),
4062         the user still MUST index entries starting at 0!
4063 
4064 
4065    Example usage:
4066 
4067    Consider the following 8x8 matrix with 34 non-zero values, that is
4068    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4069    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4070    as follows:
4071 
4072 .vb
4073             1  2  0  |  0  3  0  |  0  4
4074     Proc0   0  5  6  |  7  0  0  |  8  0
4075             9  0 10  | 11  0  0  | 12  0
4076     -------------------------------------
4077            13  0 14  | 15 16 17  |  0  0
4078     Proc1   0 18  0  | 19 20 21  |  0  0
4079             0  0  0  | 22 23  0  | 24  0
4080     -------------------------------------
4081     Proc2  25 26 27  |  0  0 28  | 29  0
4082            30  0  0  | 31 32 33  |  0 34
4083 .ve
4084 
4085    This can be represented as a collection of submatrices as:
4086 
4087 .vb
4088       A B C
4089       D E F
4090       G H I
4091 .ve
4092 
4093    Where the submatrices A,B,C are owned by proc0, D,E,F are
4094    owned by proc1, G,H,I are owned by proc2.
4095 
4096    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4098    The 'M','N' parameters are 8,8, and have the same values on all procs.
4099 
4100    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4101    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4102    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4103    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4104    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4105    matrix, ans [DF] as another SeqAIJ matrix.
4106 
4107    When d_nz, o_nz parameters are specified, d_nz storage elements are
4108    allocated for every row of the local diagonal submatrix, and o_nz
4109    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4110    One way to choose d_nz and o_nz is to use the max nonzerors per local
4111    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4112    In this case, the values of d_nz,o_nz are:
4113 .vb
4114      proc0 : dnz = 2, o_nz = 2
4115      proc1 : dnz = 3, o_nz = 2
4116      proc2 : dnz = 1, o_nz = 4
4117 .ve
4118    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4119    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4120    for proc3. i.e we are using 12+15+10=37 storage locations to store
4121    34 values.
4122 
4123    When d_nnz, o_nnz parameters are specified, the storage is specified
4124    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4125    In the above case the values for d_nnz,o_nnz are:
4126 .vb
4127      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4128      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4129      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4130 .ve
4131    Here the space allocated is sum of all the above values i.e 34, and
4132    hence pre-allocation is perfect.
4133 
4134    Level: intermediate
4135 
4136 .keywords: matrix, aij, compressed row, sparse, parallel
4137 
4138 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4139           MPIAIJ, MatCreateMPIAIJWithArrays()
4140 @*/
4141 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4142 {
4143   PetscErrorCode ierr;
4144   PetscMPIInt    size;
4145 
4146   PetscFunctionBegin;
4147   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4148   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4149   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4150   if (size > 1) {
4151     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4152     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4153   } else {
4154     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4155     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4156   }
4157   PetscFunctionReturn(0);
4158 }
4159 
4160 #undef __FUNCT__
4161 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4162 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4163 {
4164   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4165 
4166   PetscFunctionBegin;
4167   *Ad     = a->A;
4168   *Ao     = a->B;
4169   *colmap = a->garray;
4170   PetscFunctionReturn(0);
4171 }
4172 
4173 #undef __FUNCT__
4174 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4175 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4176 {
4177   PetscErrorCode ierr;
4178   PetscInt       i;
4179   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4180 
4181   PetscFunctionBegin;
4182   if (coloring->ctype == IS_COLORING_GLOBAL) {
4183     ISColoringValue *allcolors,*colors;
4184     ISColoring      ocoloring;
4185 
4186     /* set coloring for diagonal portion */
4187     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4188 
4189     /* set coloring for off-diagonal portion */
4190     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4191     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4192     for (i=0; i<a->B->cmap->n; i++) {
4193       colors[i] = allcolors[a->garray[i]];
4194     }
4195     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4196     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4197     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4198     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4199   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4200     ISColoringValue *colors;
4201     PetscInt        *larray;
4202     ISColoring      ocoloring;
4203 
4204     /* set coloring for diagonal portion */
4205     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4206     for (i=0; i<a->A->cmap->n; i++) {
4207       larray[i] = i + A->cmap->rstart;
4208     }
4209     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4210     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4211     for (i=0; i<a->A->cmap->n; i++) {
4212       colors[i] = coloring->colors[larray[i]];
4213     }
4214     ierr = PetscFree(larray);CHKERRQ(ierr);
4215     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4216     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4217     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4218 
4219     /* set coloring for off-diagonal portion */
4220     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4221     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4222     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4223     for (i=0; i<a->B->cmap->n; i++) {
4224       colors[i] = coloring->colors[larray[i]];
4225     }
4226     ierr = PetscFree(larray);CHKERRQ(ierr);
4227     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4228     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4229     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4230   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4231 
4232   PetscFunctionReturn(0);
4233 }
4234 
4235 #if defined(PETSC_HAVE_ADIC)
4236 #undef __FUNCT__
4237 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4238 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4239 {
4240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4241   PetscErrorCode ierr;
4242 
4243   PetscFunctionBegin;
4244   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4245   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4246   PetscFunctionReturn(0);
4247 }
4248 #endif
4249 
4250 #undef __FUNCT__
4251 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4252 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4253 {
4254   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4255   PetscErrorCode ierr;
4256 
4257   PetscFunctionBegin;
4258   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4259   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4260   PetscFunctionReturn(0);
4261 }
4262 
4263 #undef __FUNCT__
4264 #define __FUNCT__ "MatMergeSymbolic"
4265 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4266 {
4267   PetscErrorCode ierr;
4268   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4269   PetscInt       *indx;
4270 
4271   PetscFunctionBegin;
4272   /* This routine will ONLY return MPIAIJ type matrix */
4273   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4274   if (n == PETSC_DECIDE){
4275     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4276   }
4277   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4278   rstart -= m;
4279 
4280   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4281   for (i=0;i<m;i++) {
4282     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4283     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4284     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4285   }
4286 
4287   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4288   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4289   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4290   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4291   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4292   PetscFunctionReturn(0);
4293 }
4294 
4295 #undef __FUNCT__
4296 #define __FUNCT__ "MatMergeNumeric"
4297 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4298 {
4299   PetscErrorCode ierr;
4300   PetscInt       m,N,i,rstart,nnz,Ii;
4301   PetscInt       *indx;
4302   PetscScalar    *values;
4303 
4304   PetscFunctionBegin;
4305   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4306   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4307   for (i=0;i<m;i++) {
4308     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4309     Ii    = i + rstart;
4310     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4311     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4312   }
4313   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4314   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4315   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4316   PetscFunctionReturn(0);
4317 }
4318 
4319 #undef __FUNCT__
4320 #define __FUNCT__ "MatMerge"
4321 /*@
4322       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4323                  matrices from each processor
4324 
4325     Collective on MPI_Comm
4326 
4327    Input Parameters:
4328 +    comm - the communicators the parallel matrix will live on
4329 .    inmat - the input sequential matrices
4330 .    n - number of local columns (or PETSC_DECIDE)
4331 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4332 
4333    Output Parameter:
4334 .    outmat - the parallel matrix generated
4335 
4336     Level: advanced
4337 
4338    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4339 
4340 @*/
4341 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4342 {
4343   PetscErrorCode ierr;
4344 
4345   PetscFunctionBegin;
4346   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4347   if (scall == MAT_INITIAL_MATRIX){
4348     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4349   }
4350   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4351   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4352   PetscFunctionReturn(0);
4353 }
4354 
4355 #undef __FUNCT__
4356 #define __FUNCT__ "MatFileSplit"
4357 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4358 {
4359   PetscErrorCode    ierr;
4360   PetscMPIInt       rank;
4361   PetscInt          m,N,i,rstart,nnz;
4362   size_t            len;
4363   const PetscInt    *indx;
4364   PetscViewer       out;
4365   char              *name;
4366   Mat               B;
4367   const PetscScalar *values;
4368 
4369   PetscFunctionBegin;
4370   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4371   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4372   /* Should this be the type of the diagonal block of A? */
4373   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4374   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4375   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4376   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4377   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4378   for (i=0;i<m;i++) {
4379     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4380     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4381     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4382   }
4383   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4384   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4385 
4386   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4387   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4388   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4389   sprintf(name,"%s.%d",outfile,rank);
4390   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4391   ierr = PetscFree(name);
4392   ierr = MatView(B,out);CHKERRQ(ierr);
4393   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4394   ierr = MatDestroy(&B);CHKERRQ(ierr);
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4399 #undef __FUNCT__
4400 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4401 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4402 {
4403   PetscErrorCode       ierr;
4404   Mat_Merge_SeqsToMPI  *merge;
4405   PetscContainer       container;
4406 
4407   PetscFunctionBegin;
4408   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4409   if (container) {
4410     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4411     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4412     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4413     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4414     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4415     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4423     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4424     ierr = PetscFree(merge);CHKERRQ(ierr);
4425     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4426   }
4427   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #include <../src/mat/utils/freespace.h>
4432 #include <petscbt.h>
4433 
4434 #undef __FUNCT__
4435 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4436 /*@C
4437       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4438                  matrices from each processor
4439 
4440     Collective on MPI_Comm
4441 
4442    Input Parameters:
4443 +    comm - the communicators the parallel matrix will live on
4444 .    seqmat - the input sequential matrices
4445 .    m - number of local rows (or PETSC_DECIDE)
4446 .    n - number of local columns (or PETSC_DECIDE)
4447 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4448 
4449    Output Parameter:
4450 .    mpimat - the parallel matrix generated
4451 
4452     Level: advanced
4453 
4454    Notes:
4455      The dimensions of the sequential matrix in each processor MUST be the same.
4456      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4457      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4458 @*/
4459 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4460 {
4461   PetscErrorCode       ierr;
4462   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4463   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4464   PetscMPIInt          size,rank,taga,*len_s;
4465   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4466   PetscInt             proc,m;
4467   PetscInt             **buf_ri,**buf_rj;
4468   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4469   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4470   MPI_Request          *s_waits,*r_waits;
4471   MPI_Status           *status;
4472   MatScalar            *aa=a->a;
4473   MatScalar            **abuf_r,*ba_i;
4474   Mat_Merge_SeqsToMPI  *merge;
4475   PetscContainer       container;
4476 
4477   PetscFunctionBegin;
4478   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4479 
4480   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4481   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4482 
4483   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4484   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4485 
4486   bi     = merge->bi;
4487   bj     = merge->bj;
4488   buf_ri = merge->buf_ri;
4489   buf_rj = merge->buf_rj;
4490 
4491   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4492   owners = merge->rowmap->range;
4493   len_s  = merge->len_s;
4494 
4495   /* send and recv matrix values */
4496   /*-----------------------------*/
4497   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4498   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4499 
4500   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4501   for (proc=0,k=0; proc<size; proc++){
4502     if (!len_s[proc]) continue;
4503     i = owners[proc];
4504     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4505     k++;
4506   }
4507 
4508   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4509   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4510   ierr = PetscFree(status);CHKERRQ(ierr);
4511 
4512   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4513   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4514 
4515   /* insert mat values of mpimat */
4516   /*----------------------------*/
4517   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4518   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4519 
4520   for (k=0; k<merge->nrecv; k++){
4521     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4522     nrows = *(buf_ri_k[k]);
4523     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4524     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4525   }
4526 
4527   /* set values of ba */
4528   m = merge->rowmap->n;
4529   for (i=0; i<m; i++) {
4530     arow = owners[rank] + i;
4531     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4532     bnzi = bi[i+1] - bi[i];
4533     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4534 
4535     /* add local non-zero vals of this proc's seqmat into ba */
4536     anzi = ai[arow+1] - ai[arow];
4537     aj   = a->j + ai[arow];
4538     aa   = a->a + ai[arow];
4539     nextaj = 0;
4540     for (j=0; nextaj<anzi; j++){
4541       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4542         ba_i[j] += aa[nextaj++];
4543       }
4544     }
4545 
4546     /* add received vals into ba */
4547     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4548       /* i-th row */
4549       if (i == *nextrow[k]) {
4550         anzi = *(nextai[k]+1) - *nextai[k];
4551         aj   = buf_rj[k] + *(nextai[k]);
4552         aa   = abuf_r[k] + *(nextai[k]);
4553         nextaj = 0;
4554         for (j=0; nextaj<anzi; j++){
4555           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4556             ba_i[j] += aa[nextaj++];
4557           }
4558         }
4559         nextrow[k]++; nextai[k]++;
4560       }
4561     }
4562     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4563   }
4564   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4565   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4566 
4567   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4568   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4569   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4570   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4571   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4576 
4577 #undef __FUNCT__
4578 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4579 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4580 {
4581   PetscErrorCode       ierr;
4582   Mat                  B_mpi;
4583   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4584   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4585   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4586   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4587   PetscInt             len,proc,*dnz,*onz;
4588   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4589   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4590   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4591   MPI_Status           *status;
4592   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4593   PetscBT              lnkbt;
4594   Mat_Merge_SeqsToMPI  *merge;
4595   PetscContainer       container;
4596 
4597   PetscFunctionBegin;
4598   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4599 
4600   /* make sure it is a PETSc comm */
4601   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4602   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4603   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4604 
4605   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4606   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4607 
4608   /* determine row ownership */
4609   /*---------------------------------------------------------*/
4610   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4611   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4612   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4613   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4614   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4615   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4616   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4617 
4618   m      = merge->rowmap->n;
4619   M      = merge->rowmap->N;
4620   owners = merge->rowmap->range;
4621 
4622   /* determine the number of messages to send, their lengths */
4623   /*---------------------------------------------------------*/
4624   len_s  = merge->len_s;
4625 
4626   len = 0;  /* length of buf_si[] */
4627   merge->nsend = 0;
4628   for (proc=0; proc<size; proc++){
4629     len_si[proc] = 0;
4630     if (proc == rank){
4631       len_s[proc] = 0;
4632     } else {
4633       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4634       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4635     }
4636     if (len_s[proc]) {
4637       merge->nsend++;
4638       nrows = 0;
4639       for (i=owners[proc]; i<owners[proc+1]; i++){
4640         if (ai[i+1] > ai[i]) nrows++;
4641       }
4642       len_si[proc] = 2*(nrows+1);
4643       len += len_si[proc];
4644     }
4645   }
4646 
4647   /* determine the number and length of messages to receive for ij-structure */
4648   /*-------------------------------------------------------------------------*/
4649   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4650   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4651 
4652   /* post the Irecv of j-structure */
4653   /*-------------------------------*/
4654   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4655   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4656 
4657   /* post the Isend of j-structure */
4658   /*--------------------------------*/
4659   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4660 
4661   for (proc=0, k=0; proc<size; proc++){
4662     if (!len_s[proc]) continue;
4663     i = owners[proc];
4664     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4665     k++;
4666   }
4667 
4668   /* receives and sends of j-structure are complete */
4669   /*------------------------------------------------*/
4670   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4671   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4672 
4673   /* send and recv i-structure */
4674   /*---------------------------*/
4675   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4676   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4677 
4678   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4679   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4680   for (proc=0,k=0; proc<size; proc++){
4681     if (!len_s[proc]) continue;
4682     /* form outgoing message for i-structure:
4683          buf_si[0]:                 nrows to be sent
4684                [1:nrows]:           row index (global)
4685                [nrows+1:2*nrows+1]: i-structure index
4686     */
4687     /*-------------------------------------------*/
4688     nrows = len_si[proc]/2 - 1;
4689     buf_si_i    = buf_si + nrows+1;
4690     buf_si[0]   = nrows;
4691     buf_si_i[0] = 0;
4692     nrows = 0;
4693     for (i=owners[proc]; i<owners[proc+1]; i++){
4694       anzi = ai[i+1] - ai[i];
4695       if (anzi) {
4696         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4697         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4698         nrows++;
4699       }
4700     }
4701     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4702     k++;
4703     buf_si += len_si[proc];
4704   }
4705 
4706   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4707   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4708 
4709   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4710   for (i=0; i<merge->nrecv; i++){
4711     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4712   }
4713 
4714   ierr = PetscFree(len_si);CHKERRQ(ierr);
4715   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4716   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4717   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4718   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4719   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4720   ierr = PetscFree(status);CHKERRQ(ierr);
4721 
4722   /* compute a local seq matrix in each processor */
4723   /*----------------------------------------------*/
4724   /* allocate bi array and free space for accumulating nonzero column info */
4725   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4726   bi[0] = 0;
4727 
4728   /* create and initialize a linked list */
4729   nlnk = N+1;
4730   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4731 
4732   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4733   len = 0;
4734   len  = ai[owners[rank+1]] - ai[owners[rank]];
4735   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4736   current_space = free_space;
4737 
4738   /* determine symbolic info for each local row */
4739   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4740 
4741   for (k=0; k<merge->nrecv; k++){
4742     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4743     nrows = *buf_ri_k[k];
4744     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4745     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4746   }
4747 
4748   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4749   len = 0;
4750   for (i=0;i<m;i++) {
4751     bnzi   = 0;
4752     /* add local non-zero cols of this proc's seqmat into lnk */
4753     arow   = owners[rank] + i;
4754     anzi   = ai[arow+1] - ai[arow];
4755     aj     = a->j + ai[arow];
4756     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4757     bnzi += nlnk;
4758     /* add received col data into lnk */
4759     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4760       if (i == *nextrow[k]) { /* i-th row */
4761         anzi = *(nextai[k]+1) - *nextai[k];
4762         aj   = buf_rj[k] + *nextai[k];
4763         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4764         bnzi += nlnk;
4765         nextrow[k]++; nextai[k]++;
4766       }
4767     }
4768     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4769 
4770     /* if free space is not available, make more free space */
4771     if (current_space->local_remaining<bnzi) {
4772       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4773       nspacedouble++;
4774     }
4775     /* copy data into free space, then initialize lnk */
4776     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4777     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4778 
4779     current_space->array           += bnzi;
4780     current_space->local_used      += bnzi;
4781     current_space->local_remaining -= bnzi;
4782 
4783     bi[i+1] = bi[i] + bnzi;
4784   }
4785 
4786   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4787 
4788   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4789   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4790   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4791 
4792   /* create symbolic parallel matrix B_mpi */
4793   /*---------------------------------------*/
4794   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4795   if (n==PETSC_DECIDE) {
4796     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4797   } else {
4798     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4799   }
4800   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4801   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4802   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4803 
4804   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4805   B_mpi->assembled     = PETSC_FALSE;
4806   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4807   merge->bi            = bi;
4808   merge->bj            = bj;
4809   merge->buf_ri        = buf_ri;
4810   merge->buf_rj        = buf_rj;
4811   merge->coi           = PETSC_NULL;
4812   merge->coj           = PETSC_NULL;
4813   merge->owners_co     = PETSC_NULL;
4814 
4815   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4816 
4817   /* attach the supporting struct to B_mpi for reuse */
4818   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4819   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4820   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4821   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4822   *mpimat = B_mpi;
4823 
4824   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4825   PetscFunctionReturn(0);
4826 }
4827 
4828 #undef __FUNCT__
4829 #define __FUNCT__ "MatMerge_SeqsToMPI"
4830 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4831 {
4832   PetscErrorCode   ierr;
4833 
4834   PetscFunctionBegin;
4835   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4836   if (scall == MAT_INITIAL_MATRIX){
4837     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4838   }
4839   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4840   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4841   PetscFunctionReturn(0);
4842 }
4843 
4844 #undef __FUNCT__
4845 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4846 /*@
4847      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4848           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4849           with MatGetSize()
4850 
4851     Not Collective
4852 
4853    Input Parameters:
4854 +    A - the matrix
4855 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4856 
4857    Output Parameter:
4858 .    A_loc - the local sequential matrix generated
4859 
4860     Level: developer
4861 
4862 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4863 
4864 @*/
4865 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4866 {
4867   PetscErrorCode  ierr;
4868   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4869   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4870   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4871   MatScalar       *aa=a->a,*ba=b->a,*cam;
4872   PetscScalar     *ca;
4873   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4874   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4875   PetscBool       match;
4876 
4877   PetscFunctionBegin;
4878   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4879   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4880   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4881   if (scall == MAT_INITIAL_MATRIX){
4882     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4883     ci[0] = 0;
4884     for (i=0; i<am; i++){
4885       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4886     }
4887     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4888     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4889     k = 0;
4890     for (i=0; i<am; i++) {
4891       ncols_o = bi[i+1] - bi[i];
4892       ncols_d = ai[i+1] - ai[i];
4893       /* off-diagonal portion of A */
4894       for (jo=0; jo<ncols_o; jo++) {
4895         col = cmap[*bj];
4896         if (col >= cstart) break;
4897         cj[k]   = col; bj++;
4898         ca[k++] = *ba++;
4899       }
4900       /* diagonal portion of A */
4901       for (j=0; j<ncols_d; j++) {
4902         cj[k]   = cstart + *aj++;
4903         ca[k++] = *aa++;
4904       }
4905       /* off-diagonal portion of A */
4906       for (j=jo; j<ncols_o; j++) {
4907         cj[k]   = cmap[*bj++];
4908         ca[k++] = *ba++;
4909       }
4910     }
4911     /* put together the new matrix */
4912     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4913     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4914     /* Since these are PETSc arrays, change flags to free them as necessary. */
4915     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4916     mat->free_a  = PETSC_TRUE;
4917     mat->free_ij = PETSC_TRUE;
4918     mat->nonew   = 0;
4919   } else if (scall == MAT_REUSE_MATRIX){
4920     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4921     ci = mat->i; cj = mat->j; cam = mat->a;
4922     for (i=0; i<am; i++) {
4923       /* off-diagonal portion of A */
4924       ncols_o = bi[i+1] - bi[i];
4925       for (jo=0; jo<ncols_o; jo++) {
4926         col = cmap[*bj];
4927         if (col >= cstart) break;
4928         *cam++ = *ba++; bj++;
4929       }
4930       /* diagonal portion of A */
4931       ncols_d = ai[i+1] - ai[i];
4932       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4933       /* off-diagonal portion of A */
4934       for (j=jo; j<ncols_o; j++) {
4935         *cam++ = *ba++; bj++;
4936       }
4937     }
4938   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4939   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4940   PetscFunctionReturn(0);
4941 }
4942 
4943 #undef __FUNCT__
4944 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4945 /*@C
4946      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4947 
4948     Not Collective
4949 
4950    Input Parameters:
4951 +    A - the matrix
4952 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4953 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4954 
4955    Output Parameter:
4956 .    A_loc - the local sequential matrix generated
4957 
4958     Level: developer
4959 
4960 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4961 
4962 @*/
4963 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4964 {
4965   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4966   PetscErrorCode    ierr;
4967   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4968   IS                isrowa,iscola;
4969   Mat               *aloc;
4970   PetscBool       match;
4971 
4972   PetscFunctionBegin;
4973   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4974   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4975   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4976   if (!row){
4977     start = A->rmap->rstart; end = A->rmap->rend;
4978     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4979   } else {
4980     isrowa = *row;
4981   }
4982   if (!col){
4983     start = A->cmap->rstart;
4984     cmap  = a->garray;
4985     nzA   = a->A->cmap->n;
4986     nzB   = a->B->cmap->n;
4987     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4988     ncols = 0;
4989     for (i=0; i<nzB; i++) {
4990       if (cmap[i] < start) idx[ncols++] = cmap[i];
4991       else break;
4992     }
4993     imark = i;
4994     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4995     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4996     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4997   } else {
4998     iscola = *col;
4999   }
5000   if (scall != MAT_INITIAL_MATRIX){
5001     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5002     aloc[0] = *A_loc;
5003   }
5004   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5005   *A_loc = aloc[0];
5006   ierr = PetscFree(aloc);CHKERRQ(ierr);
5007   if (!row){
5008     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5009   }
5010   if (!col){
5011     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5012   }
5013   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5014   PetscFunctionReturn(0);
5015 }
5016 
5017 #undef __FUNCT__
5018 #define __FUNCT__ "MatGetBrowsOfAcols"
5019 /*@C
5020     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5021 
5022     Collective on Mat
5023 
5024    Input Parameters:
5025 +    A,B - the matrices in mpiaij format
5026 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5027 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5028 
5029    Output Parameter:
5030 +    rowb, colb - index sets of rows and columns of B to extract
5031 -    B_seq - the sequential matrix generated
5032 
5033     Level: developer
5034 
5035 @*/
5036 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5037 {
5038   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5039   PetscErrorCode    ierr;
5040   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5041   IS                isrowb,iscolb;
5042   Mat               *bseq=PETSC_NULL;
5043 
5044   PetscFunctionBegin;
5045   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5046     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5047   }
5048   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5049 
5050   if (scall == MAT_INITIAL_MATRIX){
5051     start = A->cmap->rstart;
5052     cmap  = a->garray;
5053     nzA   = a->A->cmap->n;
5054     nzB   = a->B->cmap->n;
5055     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5056     ncols = 0;
5057     for (i=0; i<nzB; i++) {  /* row < local row index */
5058       if (cmap[i] < start) idx[ncols++] = cmap[i];
5059       else break;
5060     }
5061     imark = i;
5062     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5063     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5064     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5065     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5066   } else {
5067     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5068     isrowb = *rowb; iscolb = *colb;
5069     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5070     bseq[0] = *B_seq;
5071   }
5072   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5073   *B_seq = bseq[0];
5074   ierr = PetscFree(bseq);CHKERRQ(ierr);
5075   if (!rowb){
5076     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5077   } else {
5078     *rowb = isrowb;
5079   }
5080   if (!colb){
5081     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5082   } else {
5083     *colb = iscolb;
5084   }
5085   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5086   PetscFunctionReturn(0);
5087 }
5088 
5089 #undef __FUNCT__
5090 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5091 /*
5092     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5093     of the OFF-DIAGONAL portion of local A
5094 
5095     Collective on Mat
5096 
5097    Input Parameters:
5098 +    A,B - the matrices in mpiaij format
5099 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5100 
5101    Output Parameter:
5102 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5103 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5104 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5105 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5106 
5107     Level: developer
5108 
5109 */
5110 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5111 {
5112   VecScatter_MPI_General *gen_to,*gen_from;
5113   PetscErrorCode         ierr;
5114   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5115   Mat_SeqAIJ             *b_oth;
5116   VecScatter             ctx=a->Mvctx;
5117   MPI_Comm               comm=((PetscObject)ctx)->comm;
5118   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5119   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5120   PetscScalar            *rvalues,*svalues;
5121   MatScalar              *b_otha,*bufa,*bufA;
5122   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5123   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5124   MPI_Status             *sstatus,rstatus;
5125   PetscMPIInt            jj;
5126   PetscInt               *cols,sbs,rbs;
5127   PetscScalar            *vals;
5128 
5129   PetscFunctionBegin;
5130   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5131     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5132   }
5133   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5134   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5135 
5136   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5137   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5138   rvalues  = gen_from->values; /* holds the length of receiving row */
5139   svalues  = gen_to->values;   /* holds the length of sending row */
5140   nrecvs   = gen_from->n;
5141   nsends   = gen_to->n;
5142 
5143   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5144   srow     = gen_to->indices;   /* local row index to be sent */
5145   sstarts  = gen_to->starts;
5146   sprocs   = gen_to->procs;
5147   sstatus  = gen_to->sstatus;
5148   sbs      = gen_to->bs;
5149   rstarts  = gen_from->starts;
5150   rprocs   = gen_from->procs;
5151   rbs      = gen_from->bs;
5152 
5153   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5154   if (scall == MAT_INITIAL_MATRIX){
5155     /* i-array */
5156     /*---------*/
5157     /*  post receives */
5158     for (i=0; i<nrecvs; i++){
5159       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5160       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5161       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5162     }
5163 
5164     /* pack the outgoing message */
5165     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5166     sstartsj[0] = 0;  rstartsj[0] = 0;
5167     len = 0; /* total length of j or a array to be sent */
5168     k = 0;
5169     for (i=0; i<nsends; i++){
5170       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5171       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5172       for (j=0; j<nrows; j++) {
5173         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5174         for (l=0; l<sbs; l++){
5175           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5176           rowlen[j*sbs+l] = ncols;
5177           len += ncols;
5178           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5179         }
5180         k++;
5181       }
5182       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5183       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5184     }
5185     /* recvs and sends of i-array are completed */
5186     i = nrecvs;
5187     while (i--) {
5188       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5189     }
5190     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5191 
5192     /* allocate buffers for sending j and a arrays */
5193     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5194     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5195 
5196     /* create i-array of B_oth */
5197     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5198     b_othi[0] = 0;
5199     len = 0; /* total length of j or a array to be received */
5200     k = 0;
5201     for (i=0; i<nrecvs; i++){
5202       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5203       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5204       for (j=0; j<nrows; j++) {
5205         b_othi[k+1] = b_othi[k] + rowlen[j];
5206         len += rowlen[j]; k++;
5207       }
5208       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5209     }
5210 
5211     /* allocate space for j and a arrrays of B_oth */
5212     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5213     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5214 
5215     /* j-array */
5216     /*---------*/
5217     /*  post receives of j-array */
5218     for (i=0; i<nrecvs; i++){
5219       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5220       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5221     }
5222 
5223     /* pack the outgoing message j-array */
5224     k = 0;
5225     for (i=0; i<nsends; i++){
5226       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5227       bufJ = bufj+sstartsj[i];
5228       for (j=0; j<nrows; j++) {
5229         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5230         for (ll=0; ll<sbs; ll++){
5231           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5232           for (l=0; l<ncols; l++){
5233             *bufJ++ = cols[l];
5234           }
5235           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5236         }
5237       }
5238       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5239     }
5240 
5241     /* recvs and sends of j-array are completed */
5242     i = nrecvs;
5243     while (i--) {
5244       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5245     }
5246     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5247   } else if (scall == MAT_REUSE_MATRIX){
5248     sstartsj = *startsj_s;
5249     rstartsj = *startsj_r;
5250     bufa     = *bufa_ptr;
5251     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5252     b_otha   = b_oth->a;
5253   } else {
5254     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5255   }
5256 
5257   /* a-array */
5258   /*---------*/
5259   /*  post receives of a-array */
5260   for (i=0; i<nrecvs; i++){
5261     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5262     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5263   }
5264 
5265   /* pack the outgoing message a-array */
5266   k = 0;
5267   for (i=0; i<nsends; i++){
5268     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5269     bufA = bufa+sstartsj[i];
5270     for (j=0; j<nrows; j++) {
5271       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5272       for (ll=0; ll<sbs; ll++){
5273         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5274         for (l=0; l<ncols; l++){
5275           *bufA++ = vals[l];
5276         }
5277         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5278       }
5279     }
5280     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5281   }
5282   /* recvs and sends of a-array are completed */
5283   i = nrecvs;
5284   while (i--) {
5285     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5286   }
5287   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5288   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5289 
5290   if (scall == MAT_INITIAL_MATRIX){
5291     /* put together the new matrix */
5292     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5293 
5294     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5295     /* Since these are PETSc arrays, change flags to free them as necessary. */
5296     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5297     b_oth->free_a  = PETSC_TRUE;
5298     b_oth->free_ij = PETSC_TRUE;
5299     b_oth->nonew   = 0;
5300 
5301     ierr = PetscFree(bufj);CHKERRQ(ierr);
5302     if (!startsj_s || !bufa_ptr){
5303       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5304       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5305     } else {
5306       *startsj_s = sstartsj;
5307       *startsj_r = rstartsj;
5308       *bufa_ptr  = bufa;
5309     }
5310   }
5311   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5312   PetscFunctionReturn(0);
5313 }
5314 
5315 #undef __FUNCT__
5316 #define __FUNCT__ "MatGetCommunicationStructs"
5317 /*@C
5318   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5319 
5320   Not Collective
5321 
5322   Input Parameters:
5323 . A - The matrix in mpiaij format
5324 
5325   Output Parameter:
5326 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5327 . colmap - A map from global column index to local index into lvec
5328 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5329 
5330   Level: developer
5331 
5332 @*/
5333 #if defined (PETSC_USE_CTABLE)
5334 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5335 #else
5336 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5337 #endif
5338 {
5339   Mat_MPIAIJ *a;
5340 
5341   PetscFunctionBegin;
5342   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5343   PetscValidPointer(lvec, 2);
5344   PetscValidPointer(colmap, 3);
5345   PetscValidPointer(multScatter, 4);
5346   a = (Mat_MPIAIJ *) A->data;
5347   if (lvec) *lvec = a->lvec;
5348   if (colmap) *colmap = a->colmap;
5349   if (multScatter) *multScatter = a->Mvctx;
5350   PetscFunctionReturn(0);
5351 }
5352 
5353 EXTERN_C_BEGIN
5354 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5355 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5356 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5357 EXTERN_C_END
5358 
5359 #undef __FUNCT__
5360 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5361 /*
5362     Computes (B'*A')' since computing B*A directly is untenable
5363 
5364                n                       p                          p
5365         (              )       (              )         (                  )
5366       m (      A       )  *  n (       B      )   =   m (         C        )
5367         (              )       (              )         (                  )
5368 
5369 */
5370 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5371 {
5372   PetscErrorCode     ierr;
5373   Mat                At,Bt,Ct;
5374 
5375   PetscFunctionBegin;
5376   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5377   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5378   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5379   ierr = MatDestroy(&At);CHKERRQ(ierr);
5380   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5381   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5382   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5383   PetscFunctionReturn(0);
5384 }
5385 
5386 #undef __FUNCT__
5387 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5388 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5389 {
5390   PetscErrorCode ierr;
5391   PetscInt       m=A->rmap->n,n=B->cmap->n;
5392   Mat            Cmat;
5393 
5394   PetscFunctionBegin;
5395   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5396   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5397   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5398   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5399   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5400   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5401   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5402   *C   = Cmat;
5403   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5404   PetscFunctionReturn(0);
5405 }
5406 
5407 /* ----------------------------------------------------------------*/
5408 #undef __FUNCT__
5409 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5410 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5411 {
5412   PetscErrorCode ierr;
5413 
5414   PetscFunctionBegin;
5415   if (scall == MAT_INITIAL_MATRIX){
5416     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5417   }
5418   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5419   PetscFunctionReturn(0);
5420 }
5421 
5422 EXTERN_C_BEGIN
5423 #if defined(PETSC_HAVE_MUMPS)
5424 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5425 #endif
5426 #if defined(PETSC_HAVE_PASTIX)
5427 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5428 #endif
5429 #if defined(PETSC_HAVE_SUPERLU_DIST)
5430 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5431 #endif
5432 #if defined(PETSC_HAVE_SPOOLES)
5433 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5434 #endif
5435 EXTERN_C_END
5436 
5437 /*MC
5438    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5439 
5440    Options Database Keys:
5441 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5442 
5443   Level: beginner
5444 
5445 .seealso: MatCreateMPIAIJ()
5446 M*/
5447 
5448 EXTERN_C_BEGIN
5449 #undef __FUNCT__
5450 #define __FUNCT__ "MatCreate_MPIAIJ"
5451 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5452 {
5453   Mat_MPIAIJ     *b;
5454   PetscErrorCode ierr;
5455   PetscMPIInt    size;
5456 
5457   PetscFunctionBegin;
5458   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5459 
5460   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5461   B->data         = (void*)b;
5462   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5463   B->rmap->bs     = 1;
5464   B->assembled    = PETSC_FALSE;
5465 
5466   B->insertmode   = NOT_SET_VALUES;
5467   b->size         = size;
5468   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5469 
5470   /* build cache for off array entries formed */
5471   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5472   b->donotstash  = PETSC_FALSE;
5473   b->colmap      = 0;
5474   b->garray      = 0;
5475   b->roworiented = PETSC_TRUE;
5476 
5477   /* stuff used for matrix vector multiply */
5478   b->lvec      = PETSC_NULL;
5479   b->Mvctx     = PETSC_NULL;
5480 
5481   /* stuff for MatGetRow() */
5482   b->rowindices   = 0;
5483   b->rowvalues    = 0;
5484   b->getrowactive = PETSC_FALSE;
5485 
5486 #if defined(PETSC_HAVE_SPOOLES)
5487   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5488                                      "MatGetFactor_mpiaij_spooles",
5489                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5490 #endif
5491 #if defined(PETSC_HAVE_MUMPS)
5492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5493                                      "MatGetFactor_aij_mumps",
5494                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5495 #endif
5496 #if defined(PETSC_HAVE_PASTIX)
5497   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5498 					   "MatGetFactor_mpiaij_pastix",
5499 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5500 #endif
5501 #if defined(PETSC_HAVE_SUPERLU_DIST)
5502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5503                                      "MatGetFactor_mpiaij_superlu_dist",
5504                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5505 #endif
5506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5507                                      "MatStoreValues_MPIAIJ",
5508                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5510                                      "MatRetrieveValues_MPIAIJ",
5511                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5512   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5513 				     "MatGetDiagonalBlock_MPIAIJ",
5514                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5516 				     "MatIsTranspose_MPIAIJ",
5517 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5519 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5520 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5521   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5522 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5523 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5524   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5525 				     "MatDiagonalScaleLocal_MPIAIJ",
5526 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5527   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5528                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5529                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5531                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5532                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5534                                      "MatConvert_MPIAIJ_MPISBAIJ",
5535                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5536   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5537                                      "MatMatMult_MPIDense_MPIAIJ",
5538                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5539   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5540                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5541                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5543                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5544                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5545   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5546   PetscFunctionReturn(0);
5547 }
5548 EXTERN_C_END
5549 
5550 #undef __FUNCT__
5551 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5552 /*@
5553      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5554          and "off-diagonal" part of the matrix in CSR format.
5555 
5556    Collective on MPI_Comm
5557 
5558    Input Parameters:
5559 +  comm - MPI communicator
5560 .  m - number of local rows (Cannot be PETSC_DECIDE)
5561 .  n - This value should be the same as the local size used in creating the
5562        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5563        calculated if N is given) For square matrices n is almost always m.
5564 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5565 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5566 .   i - row indices for "diagonal" portion of matrix
5567 .   j - column indices
5568 .   a - matrix values
5569 .   oi - row indices for "off-diagonal" portion of matrix
5570 .   oj - column indices
5571 -   oa - matrix values
5572 
5573    Output Parameter:
5574 .   mat - the matrix
5575 
5576    Level: advanced
5577 
5578    Notes:
5579        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5580        must free the arrays once the matrix has been destroyed and not before.
5581 
5582        The i and j indices are 0 based
5583 
5584        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5585 
5586        This sets local rows and cannot be used to set off-processor values.
5587 
5588        You cannot later use MatSetValues() to change values in this matrix.
5589 
5590 .keywords: matrix, aij, compressed row, sparse, parallel
5591 
5592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5593           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5594 @*/
5595 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5596 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5597 {
5598   PetscErrorCode ierr;
5599   Mat_MPIAIJ     *maij;
5600 
5601  PetscFunctionBegin;
5602   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5603   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5604   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5605   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5606   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5607   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5608   maij = (Mat_MPIAIJ*) (*mat)->data;
5609   maij->donotstash     = PETSC_TRUE;
5610   (*mat)->preallocated = PETSC_TRUE;
5611 
5612   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5613   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5614   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5615   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5616 
5617   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5618   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5619 
5620   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5621   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5622   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5623   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5624 
5625   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5626   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5627   PetscFunctionReturn(0);
5628 }
5629 
5630 /*
5631     Special version for direct calls from Fortran
5632 */
5633 #include <private/fortranimpl.h>
5634 
5635 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5636 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5637 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5638 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5639 #endif
5640 
5641 /* Change these macros so can be used in void function */
5642 #undef CHKERRQ
5643 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5644 #undef SETERRQ2
5645 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5646 #undef SETERRQ
5647 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5648 
5649 EXTERN_C_BEGIN
5650 #undef __FUNCT__
5651 #define __FUNCT__ "matsetvaluesmpiaij_"
5652 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5653 {
5654   Mat             mat = *mmat;
5655   PetscInt        m = *mm, n = *mn;
5656   InsertMode      addv = *maddv;
5657   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5658   PetscScalar     value;
5659   PetscErrorCode  ierr;
5660 
5661   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5662   if (mat->insertmode == NOT_SET_VALUES) {
5663     mat->insertmode = addv;
5664   }
5665 #if defined(PETSC_USE_DEBUG)
5666   else if (mat->insertmode != addv) {
5667     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5668   }
5669 #endif
5670   {
5671   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5672   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5673   PetscBool       roworiented = aij->roworiented;
5674 
5675   /* Some Variables required in the macro */
5676   Mat             A = aij->A;
5677   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5678   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5679   MatScalar       *aa = a->a;
5680   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5681   Mat             B = aij->B;
5682   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5683   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5684   MatScalar       *ba = b->a;
5685 
5686   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5687   PetscInt        nonew = a->nonew;
5688   MatScalar       *ap1,*ap2;
5689 
5690   PetscFunctionBegin;
5691   for (i=0; i<m; i++) {
5692     if (im[i] < 0) continue;
5693 #if defined(PETSC_USE_DEBUG)
5694     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5695 #endif
5696     if (im[i] >= rstart && im[i] < rend) {
5697       row      = im[i] - rstart;
5698       lastcol1 = -1;
5699       rp1      = aj + ai[row];
5700       ap1      = aa + ai[row];
5701       rmax1    = aimax[row];
5702       nrow1    = ailen[row];
5703       low1     = 0;
5704       high1    = nrow1;
5705       lastcol2 = -1;
5706       rp2      = bj + bi[row];
5707       ap2      = ba + bi[row];
5708       rmax2    = bimax[row];
5709       nrow2    = bilen[row];
5710       low2     = 0;
5711       high2    = nrow2;
5712 
5713       for (j=0; j<n; j++) {
5714         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5715         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5716         if (in[j] >= cstart && in[j] < cend){
5717           col = in[j] - cstart;
5718           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5719         } else if (in[j] < 0) continue;
5720 #if defined(PETSC_USE_DEBUG)
5721         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5722 #endif
5723         else {
5724           if (mat->was_assembled) {
5725             if (!aij->colmap) {
5726               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5727             }
5728 #if defined (PETSC_USE_CTABLE)
5729             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5730 	    col--;
5731 #else
5732             col = aij->colmap[in[j]] - 1;
5733 #endif
5734             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5735               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5736               col =  in[j];
5737               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5738               B = aij->B;
5739               b = (Mat_SeqAIJ*)B->data;
5740               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5741               rp2      = bj + bi[row];
5742               ap2      = ba + bi[row];
5743               rmax2    = bimax[row];
5744               nrow2    = bilen[row];
5745               low2     = 0;
5746               high2    = nrow2;
5747               bm       = aij->B->rmap->n;
5748               ba = b->a;
5749             }
5750           } else col = in[j];
5751           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5752         }
5753       }
5754     } else {
5755       if (!aij->donotstash) {
5756         if (roworiented) {
5757           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5758         } else {
5759           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5760         }
5761       }
5762     }
5763   }}
5764   PetscFunctionReturnVoid();
5765 }
5766 EXTERN_C_END
5767 
5768