xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a63bb30eff8b1dff6ddce5531d6230e04f6fd10b)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   if (x) {
1004     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008   }
1009   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1010 
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<slen; i++) {
1014     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1015   }
1016 
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use){
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++){
1023       n   = ii[i+1] - ii[i];
1024       aj  = aij->j + ii[i];
1025       aa  = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa        = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n   = ii[i+1] - ii[i];
1041       aj  = aij->j + ii[i];
1042       aa  = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa    = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* wait on sends */
1062   if (nsends) {
1063     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1064     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1065     ierr = PetscFree(send_status);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1068   ierr = PetscFree(svalues);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMult_MPIAIJ"
1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       nt;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1083   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscBool      merged;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1128   /* do nondiagonal part */
1129   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1130   if (!merged) {
1131     /* send it on its way */
1132     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133     /* do local part */
1134     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1135     /* receive remote parts: note this assumes the values are not actually */
1136     /* added in yy until the next line, */
1137     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   } else {
1139     /* do local part */
1140     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141     /* send it on its way */
1142     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143     /* values actually were received in the Begin() but we need to call this nop */
1144     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 EXTERN_C_BEGIN
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1152 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1153 {
1154   MPI_Comm       comm;
1155   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157   IS             Me,Notme;
1158   PetscErrorCode ierr;
1159   PetscInt       M,N,first,last,*notme,i;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163 
1164   /* Easy test: symmetric diagonal block */
1165   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1167   if (!*f) PetscFunctionReturn(0);
1168   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170   if (size == 1) PetscFunctionReturn(0);
1171 
1172   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1175   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1176   for (i=0; i<first; i++) notme[i] = i;
1177   for (i=last; i<M; i++) notme[i-last+first] = i;
1178   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1179   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1180   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1181   Aoff = Aoffs[0];
1182   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1183   Boff = Boffs[0];
1184   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1185   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1187   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1189   ierr = PetscFree(notme);CHKERRQ(ierr);
1190    PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220 {
1221   PetscErrorCode ierr;
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIAIJ"
1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatDestroy_MPIAIJ"
1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247 {
1248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252 #if defined(PETSC_USE_LOG)
1253   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254 #endif
1255   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1256   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1257   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1259 #if defined (PETSC_USE_CTABLE)
1260   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1261 #else
1262   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1266   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1267   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1268   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1269   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286 {
1287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1289   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1292   int               fd;
1293   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1294   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295   PetscScalar       *column_values;
1296   PetscInt          message_count,flowcontrolcount;
1297 
1298   PetscFunctionBegin;
1299   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1300   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1301   nz   = A->nz + B->nz;
1302   if (!rank) {
1303     header[0] = MAT_FILE_CLASSID;
1304     header[1] = mat->rmap->N;
1305     header[2] = mat->cmap->N;
1306     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1307     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1309     /* get largest number of rows any processor has */
1310     rlen = mat->rmap->n;
1311     range = mat->rmap->range;
1312     for (i=1; i<size; i++) {
1313       rlen = PetscMax(rlen,range[i+1] - range[i]);
1314     }
1315   } else {
1316     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1317     rlen = mat->rmap->n;
1318   }
1319 
1320   /* load up the local row counts */
1321   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1322   for (i=0; i<mat->rmap->n; i++) {
1323     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324   }
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1339     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* )th processor needs space a largest processor needs */
1346   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( (col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_indices[cnt++] = A->j[k] + cstart;
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_indices[cnt++] = garray[B->j[j]];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column indices to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1383 
1384   /* load up the local column values */
1385   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1386   cnt  = 0;
1387   for (i=0; i<mat->rmap->n; i++) {
1388     for (j=B->i[i]; j<B->i[i+1]; j++) {
1389       if ( garray[B->j[j]] > cstart) break;
1390       column_values[cnt++] = B->a[j];
1391     }
1392     for (k=A->i[i]; k<A->i[i+1]; k++) {
1393       column_values[cnt++] = A->a[k];
1394     }
1395     for (; j<B->i[i+1]; j++) {
1396       column_values[cnt++] = B->a[j];
1397     }
1398   }
1399   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1400 
1401   /* store the column values to the file */
1402    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1403   if (!rank) {
1404     MPI_Status status;
1405     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     for (i=1; i<size; i++) {
1407        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1408       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1409       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     }
1413     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1414   } else {
1415     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1416     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1419   }
1420   ierr = PetscFree(column_values);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427 {
1428   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1429   PetscErrorCode    ierr;
1430   PetscMPIInt       rank = aij->rank,size = aij->size;
1431   PetscBool         isdraw,iascii,isbinary;
1432   PetscViewer       sviewer;
1433   PetscViewerFormat format;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   if (iascii) {
1440     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1441     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442       MatInfo    info;
1443       PetscBool  inodes;
1444 
1445       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1446       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1449       if (!inodes) {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       }
1456       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1458       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1460       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1463       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466       PetscInt   inodecount,inodelimit,*inodes;
1467       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1468       if (inodes) {
1469         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1472       }
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475       PetscFunctionReturn(0);
1476     }
1477   } else if (isbinary) {
1478     if (size == 1) {
1479       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1480       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1481     } else {
1482       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1483     }
1484     PetscFunctionReturn(0);
1485   } else if (isdraw) {
1486     PetscDraw  draw;
1487     PetscBool  isnull;
1488     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1489     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1490   }
1491 
1492   if (size == 1) {
1493     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1495   } else {
1496     /* assemble the entire matrix onto first processor. */
1497     Mat         A;
1498     Mat_SeqAIJ  *Aloc;
1499     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500     MatScalar   *a;
1501 
1502     if (mat->rmap->N > 1024) {
1503       PetscBool  flg = PETSC_FALSE;
1504 
1505       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1506       if (!flg) {
1507         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508       }
1509     }
1510 
1511     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1512     if (!rank) {
1513       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1514     } else {
1515       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1516     }
1517     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1519     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1521 
1522     /* copy over the A part */
1523     Aloc = (Mat_SeqAIJ*)aij->A->data;
1524     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1525     row = mat->rmap->rstart;
1526     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1527     for (i=0; i<m; i++) {
1528       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1529       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1530     }
1531     aj = Aloc->j;
1532     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1533 
1534     /* copy over the B part */
1535     Aloc = (Mat_SeqAIJ*)aij->B->data;
1536     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1537     row  = mat->rmap->rstart;
1538     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1539     ct   = cols;
1540     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1541     for (i=0; i<m; i++) {
1542       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1543       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1544     }
1545     ierr = PetscFree(ct);CHKERRQ(ierr);
1546     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548     /*
1549        Everyone has to call to draw the matrix since the graphics waits are
1550        synchronized across all processors that share the PetscDraw object
1551     */
1552     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1553     if (!rank) {
1554       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1555       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1556       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1557       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1558     }
1559     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1560     ierr = MatDestroy(&A);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatView_MPIAIJ"
1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1568 {
1569   PetscErrorCode ierr;
1570   PetscBool      iascii,isdraw,issocket,isbinary;
1571 
1572   PetscFunctionBegin;
1573   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1574   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1575   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1576   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1577   if (iascii || isdraw || isbinary || issocket) {
1578     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1579   } else {
1580     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSOR_MPIAIJ"
1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1588 {
1589   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1590   PetscErrorCode ierr;
1591   Vec            bb1 = 0;
1592   PetscBool      hasop;
1593 
1594   PetscFunctionBegin;
1595   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1596     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1597   }
1598 
1599   if (flag == SOR_APPLY_UPPER) {
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1601     PetscFunctionReturn(0);
1602   }
1603 
1604   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1605     if (flag & SOR_ZERO_INITIAL_GUESS) {
1606       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607       its--;
1608     }
1609 
1610     while (its--) {
1611       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613 
1614       /* update rhs: bb1 = bb - B*x */
1615       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1616       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1617 
1618       /* local sweep */
1619       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1620     }
1621   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626     while (its--) {
1627       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629 
1630       /* update rhs: bb1 = bb - B*x */
1631       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1632       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1633 
1634       /* local sweep */
1635       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1636     }
1637   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1638     if (flag & SOR_ZERO_INITIAL_GUESS) {
1639       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1640       its--;
1641     }
1642     while (its--) {
1643       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645 
1646       /* update rhs: bb1 = bb - B*x */
1647       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1648       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1649 
1650       /* local sweep */
1651       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1652     }
1653   }  else if (flag & SOR_EISENSTAT) {
1654     Vec         xx1;
1655 
1656     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1657     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1658 
1659     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (!mat->diag) {
1662       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1663       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1664     }
1665     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1666     if (hasop) {
1667       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1668     } else {
1669       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1670     }
1671     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1672 
1673     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1674 
1675     /* local sweep */
1676     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1677     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1678     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1679   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1680 
1681   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatPermute_MPIAIJ"
1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1688 {
1689   MPI_Comm       comm,pcomm;
1690   PetscInt       first,local_size,nrows;
1691   const PetscInt *rows;
1692   PetscMPIInt    size;
1693   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1698   /* make a collective version of 'rowp' */
1699   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1700   if (pcomm==comm) {
1701     crowp = rowp;
1702   } else {
1703     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1704     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1705     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1706     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1707   }
1708   /* collect the global row permutation and invert it */
1709   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1710   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1711   if (pcomm!=comm) {
1712     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1713   }
1714   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1715   /* get the local target indices */
1716   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1717   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1718   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1719   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1720   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1721   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1722   /* the column permutation is so much easier;
1723      make a local version of 'colp' and invert it */
1724   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1725   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1726   if (size==1) {
1727     lcolp = colp;
1728   } else {
1729     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1730     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1731     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1732   }
1733   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1734   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1735   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1736   if (size>1) {
1737     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1738     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1739   }
1740   /* now we just get the submatrix */
1741   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1742   /* clean up */
1743   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1744   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1750 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1751 {
1752   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1753   Mat            A = mat->A,B = mat->B;
1754   PetscErrorCode ierr;
1755   PetscReal      isend[5],irecv[5];
1756 
1757   PetscFunctionBegin;
1758   info->block_size     = 1.0;
1759   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1760   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1761   isend[3] = info->memory;  isend[4] = info->mallocs;
1762   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1763   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764   isend[3] += info->memory;  isend[4] += info->mallocs;
1765   if (flag == MAT_LOCAL) {
1766     info->nz_used      = isend[0];
1767     info->nz_allocated = isend[1];
1768     info->nz_unneeded  = isend[2];
1769     info->memory       = isend[3];
1770     info->mallocs      = isend[4];
1771   } else if (flag == MAT_GLOBAL_MAX) {
1772     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1773     info->nz_used      = irecv[0];
1774     info->nz_allocated = irecv[1];
1775     info->nz_unneeded  = irecv[2];
1776     info->memory       = irecv[3];
1777     info->mallocs      = irecv[4];
1778   } else if (flag == MAT_GLOBAL_SUM) {
1779     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789 
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 #undef __FUNCT__
1794 #define __FUNCT__ "MatSetOption_MPIAIJ"
1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1796 {
1797   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1798   PetscErrorCode ierr;
1799 
1800   PetscFunctionBegin;
1801   switch (op) {
1802   case MAT_NEW_NONZERO_LOCATIONS:
1803   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1804   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1805   case MAT_KEEP_NONZERO_PATTERN:
1806   case MAT_NEW_NONZERO_LOCATION_ERR:
1807   case MAT_USE_INODES:
1808   case MAT_IGNORE_ZERO_ENTRIES:
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_ROW_ORIENTED:
1813     a->roworiented = flg;
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1819     break;
1820   case MAT_IGNORE_OFF_PROC_ENTRIES:
1821     a->donotstash = flg;
1822     break;
1823   case MAT_SPD:
1824     A->spd_set                         = PETSC_TRUE;
1825     A->spd                             = flg;
1826     if (flg) {
1827       A->symmetric                     = PETSC_TRUE;
1828       A->structurally_symmetric        = PETSC_TRUE;
1829       A->symmetric_set                 = PETSC_TRUE;
1830       A->structurally_symmetric_set    = PETSC_TRUE;
1831     }
1832     break;
1833   case MAT_SYMMETRIC:
1834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1835     break;
1836   case MAT_STRUCTURALLY_SYMMETRIC:
1837     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1838     break;
1839   case MAT_HERMITIAN:
1840     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1841     break;
1842   case MAT_SYMMETRY_ETERNAL:
1843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1844     break;
1845   default:
1846     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1847   }
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatGetRow_MPIAIJ"
1853 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1854 {
1855   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1856   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1857   PetscErrorCode ierr;
1858   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1859   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1860   PetscInt       *cmap,*idx_p;
1861 
1862   PetscFunctionBegin;
1863   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1864   mat->getrowactive = PETSC_TRUE;
1865 
1866   if (!mat->rowvalues && (idx || v)) {
1867     /*
1868         allocate enough space to hold information from the longest row.
1869     */
1870     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1871     PetscInt   max = 1,tmp;
1872     for (i=0; i<matin->rmap->n; i++) {
1873       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1874       if (max < tmp) { max = tmp; }
1875     }
1876     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1877   }
1878 
1879   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1880   lrow = row - rstart;
1881 
1882   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1883   if (!v)   {pvA = 0; pvB = 0;}
1884   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1885   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1886   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1887   nztot = nzA + nzB;
1888 
1889   cmap  = mat->garray;
1890   if (v  || idx) {
1891     if (nztot) {
1892       /* Sort by increasing column numbers, assuming A and B already sorted */
1893       PetscInt imark = -1;
1894       if (v) {
1895         *v = v_p = mat->rowvalues;
1896         for (i=0; i<nzB; i++) {
1897           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1898           else break;
1899         }
1900         imark = i;
1901         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1902         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1903       }
1904       if (idx) {
1905         *idx = idx_p = mat->rowindices;
1906         if (imark > -1) {
1907           for (i=0; i<imark; i++) {
1908             idx_p[i] = cmap[cworkB[i]];
1909           }
1910         } else {
1911           for (i=0; i<nzB; i++) {
1912             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1913             else break;
1914           }
1915           imark = i;
1916         }
1917         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1918         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1919       }
1920     } else {
1921       if (idx) *idx = 0;
1922       if (v)   *v   = 0;
1923     }
1924   }
1925   *nz = nztot;
1926   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1927   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1933 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1934 {
1935   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1936 
1937   PetscFunctionBegin;
1938   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1939   aij->getrowactive = PETSC_FALSE;
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatNorm_MPIAIJ"
1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1946 {
1947   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1948   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1949   PetscErrorCode ierr;
1950   PetscInt       i,j,cstart = mat->cmap->rstart;
1951   PetscReal      sum = 0.0;
1952   MatScalar      *v;
1953 
1954   PetscFunctionBegin;
1955   if (aij->size == 1) {
1956     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1957   } else {
1958     if (type == NORM_FROBENIUS) {
1959       v = amat->a;
1960       for (i=0; i<amat->nz; i++) {
1961 #if defined(PETSC_USE_COMPLEX)
1962         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963 #else
1964         sum += (*v)*(*v); v++;
1965 #endif
1966       }
1967       v = bmat->a;
1968       for (i=0; i<bmat->nz; i++) {
1969 #if defined(PETSC_USE_COMPLEX)
1970         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1971 #else
1972         sum += (*v)*(*v); v++;
1973 #endif
1974       }
1975       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1976       *norm = PetscSqrtReal(*norm);
1977     } else if (type == NORM_1) { /* max column norm */
1978       PetscReal *tmp,*tmp2;
1979       PetscInt  *jj,*garray = aij->garray;
1980       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1981       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1982       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1983       *norm = 0.0;
1984       v = amat->a; jj = amat->j;
1985       for (j=0; j<amat->nz; j++) {
1986         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1987       }
1988       v = bmat->a; jj = bmat->j;
1989       for (j=0; j<bmat->nz; j++) {
1990         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1991       }
1992       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1993       for (j=0; j<mat->cmap->N; j++) {
1994         if (tmp2[j] > *norm) *norm = tmp2[j];
1995       }
1996       ierr = PetscFree(tmp);CHKERRQ(ierr);
1997       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1998     } else if (type == NORM_INFINITY) { /* max row norm */
1999       PetscReal ntemp = 0.0;
2000       for (j=0; j<aij->A->rmap->n; j++) {
2001         v = amat->a + amat->i[j];
2002         sum = 0.0;
2003         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2004           sum += PetscAbsScalar(*v); v++;
2005         }
2006         v = bmat->a + bmat->i[j];
2007         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2008           sum += PetscAbsScalar(*v); v++;
2009         }
2010         if (sum > ntemp) ntemp = sum;
2011       }
2012       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2013     } else {
2014       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2015     }
2016   }
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatTranspose_MPIAIJ"
2022 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2023 {
2024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2025   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2026   PetscErrorCode ierr;
2027   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2028   PetscInt       cstart=A->cmap->rstart,ncol;
2029   Mat            B;
2030   MatScalar      *array;
2031 
2032   PetscFunctionBegin;
2033   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2034 
2035   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2036   ai = Aloc->i; aj = Aloc->j;
2037   bi = Bloc->i; bj = Bloc->j;
2038   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2039     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2040     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2041     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2042     for (i=0; i<ai[ma]; i++){
2043       d_nnz[aj[i]] ++;
2044       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2045     }
2046 
2047     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2048     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2049     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2050     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2051     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2052   } else {
2053     B = *matout;
2054   }
2055 
2056   /* copy over the A part */
2057   array = Aloc->a;
2058   row = A->rmap->rstart;
2059   for (i=0; i<ma; i++) {
2060     ncol = ai[i+1]-ai[i];
2061     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2062     row++; array += ncol; aj += ncol;
2063   }
2064   aj = Aloc->j;
2065   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2066 
2067   /* copy over the B part */
2068   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2069   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2070   array = Bloc->a;
2071   row = A->rmap->rstart;
2072   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2073   cols_tmp = cols;
2074   for (i=0; i<mb; i++) {
2075     ncol = bi[i+1]-bi[i];
2076     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2077     row++; array += ncol; cols_tmp += ncol;
2078   }
2079   ierr = PetscFree(cols);CHKERRQ(ierr);
2080 
2081   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2084     *matout = B;
2085   } else {
2086     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2087   }
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2093 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2094 {
2095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2096   Mat            a = aij->A,b = aij->B;
2097   PetscErrorCode ierr;
2098   PetscInt       s1,s2,s3;
2099 
2100   PetscFunctionBegin;
2101   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2102   if (rr) {
2103     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2104     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2105     /* Overlap communication with computation. */
2106     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2107   }
2108   if (ll) {
2109     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2110     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2111     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2112   }
2113   /* scale  the diagonal block */
2114   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2115 
2116   if (rr) {
2117     /* Do a scatter end and then right scale the off-diagonal block */
2118     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2119     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2120   }
2121 
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 #undef __FUNCT__
2126 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2127 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2128 {
2129   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2134   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2135   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2136   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatEqual_MPIAIJ"
2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2154 {
2155   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2156   Mat            a,b,c,d;
2157   PetscBool      flg;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   a = matA->A; b = matA->B;
2162   c = matB->A; d = matB->B;
2163 
2164   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2165   if (flg) {
2166     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2167   }
2168   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 #undef __FUNCT__
2173 #define __FUNCT__ "MatCopy_MPIAIJ"
2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2175 {
2176   PetscErrorCode ierr;
2177   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2178   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2179 
2180   PetscFunctionBegin;
2181   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2182   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2183     /* because of the column compression in the off-processor part of the matrix a->B,
2184        the number of columns in a->B and b->B may be different, hence we cannot call
2185        the MatCopy() directly on the two parts. If need be, we can provide a more
2186        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2187        then copying the submatrices */
2188     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2189   } else {
2190     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2191     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2192   }
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2198 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2199 {
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatAXPY_MPIAIJ"
2209 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2210 {
2211   PetscErrorCode ierr;
2212   PetscInt       i;
2213   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2214   PetscBLASInt   bnz,one=1;
2215   Mat_SeqAIJ     *x,*y;
2216 
2217   PetscFunctionBegin;
2218   if (str == SAME_NONZERO_PATTERN) {
2219     PetscScalar alpha = a;
2220     x = (Mat_SeqAIJ *)xx->A->data;
2221     y = (Mat_SeqAIJ *)yy->A->data;
2222     bnz = PetscBLASIntCast(x->nz);
2223     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2224     x = (Mat_SeqAIJ *)xx->B->data;
2225     y = (Mat_SeqAIJ *)yy->B->data;
2226     bnz = PetscBLASIntCast(x->nz);
2227     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2228   } else if (str == SUBSET_NONZERO_PATTERN) {
2229     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2230 
2231     x = (Mat_SeqAIJ *)xx->B->data;
2232     y = (Mat_SeqAIJ *)yy->B->data;
2233     if (y->xtoy && y->XtoY != xx->B) {
2234       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2235       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2236     }
2237     if (!y->xtoy) { /* get xtoy */
2238       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2239       y->XtoY = xx->B;
2240       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2241     }
2242     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2243   } else {
2244     Mat B;
2245     PetscInt *nnz_d,*nnz_o;
2246     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2247     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2248     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2249     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2250     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2251     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,B);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatConjugate_MPIAIJ"
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatRealPart_MPIAIJ"
2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2285 {
2286   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2291   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2298 {
2299   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2304   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #ifdef PETSC_HAVE_PBGL
2309 
2310 #include <boost/parallel/mpi/bsp_process_group.hpp>
2311 #include <boost/graph/distributed/ilu_default_graph.hpp>
2312 #include <boost/graph/distributed/ilu_0_block.hpp>
2313 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2314 #include <boost/graph/distributed/petsc/interface.hpp>
2315 #include <boost/multi_array.hpp>
2316 #include <boost/parallel/distributed_property_map->hpp>
2317 
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2320 /*
2321   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2322 */
2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2324 {
2325   namespace petsc = boost::distributed::petsc;
2326 
2327   namespace graph_dist = boost::graph::distributed;
2328   using boost::graph::distributed::ilu_default::process_group_type;
2329   using boost::graph::ilu_permuted;
2330 
2331   PetscBool       row_identity, col_identity;
2332   PetscContainer  c;
2333   PetscInt        m, n, M, N;
2334   PetscErrorCode  ierr;
2335 
2336   PetscFunctionBegin;
2337   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2338   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2339   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2340   if (!row_identity || !col_identity) {
2341     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2342   }
2343 
2344   process_group_type pg;
2345   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2346   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2347   lgraph_type&   level_graph = *lgraph_p;
2348   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2349 
2350   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2351   ilu_permuted(level_graph);
2352 
2353   /* put together the new matrix */
2354   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2355   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2356   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2357   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2358   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2359   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361 
2362   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2363   ierr = PetscContainerSetPointer(c, lgraph_p);
2364   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2365   ierr = PetscContainerDestroy(&c);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2372 {
2373   PetscFunctionBegin;
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatSolve_MPIAIJ"
2379 /*
2380   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2381 */
2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2383 {
2384   namespace graph_dist = boost::graph::distributed;
2385 
2386   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2387   lgraph_type*   lgraph_p;
2388   PetscContainer c;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2393   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2394   ierr = VecCopy(b, x);CHKERRQ(ierr);
2395 
2396   PetscScalar* array_x;
2397   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2398   PetscInt sx;
2399   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2400 
2401   PetscScalar* array_b;
2402   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2403   PetscInt sb;
2404   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2405 
2406   lgraph_type&   level_graph = *lgraph_p;
2407   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2408 
2409   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2410   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2411                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2412 
2413   typedef boost::iterator_property_map<array_ref_type::iterator,
2414                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2415   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2416                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2417 
2418   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2419 
2420   PetscFunctionReturn(0);
2421 }
2422 #endif
2423 
2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2425   PetscInt       nzlocal,nsends,nrecvs;
2426   PetscMPIInt    *send_rank,*recv_rank;
2427   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2428   PetscScalar    *sbuf_a,**rbuf_a;
2429   PetscErrorCode (*Destroy)(Mat);
2430 } Mat_Redundant;
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2435 {
2436   PetscErrorCode       ierr;
2437   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2438   PetscInt             i;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2442   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2443   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2444   for (i=0; i<redund->nrecvs; i++){
2445     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2446     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2447   }
2448   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2449   ierr = PetscFree(redund);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatDestroy_MatRedundant"
2455 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2456 {
2457   PetscErrorCode  ierr;
2458   PetscContainer  container;
2459   Mat_Redundant   *redund=PETSC_NULL;
2460 
2461   PetscFunctionBegin;
2462   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2463   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2464   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2465   A->ops->destroy = redund->Destroy;
2466   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2467   if (A->ops->destroy) {
2468     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2469   }
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2476 {
2477   PetscMPIInt    rank,size;
2478   MPI_Comm       comm=((PetscObject)mat)->comm;
2479   PetscErrorCode ierr;
2480   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2481   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2482   PetscInt       *rowrange=mat->rmap->range;
2483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2484   Mat            A=aij->A,B=aij->B,C=*matredundant;
2485   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2486   PetscScalar    *sbuf_a;
2487   PetscInt       nzlocal=a->nz+b->nz;
2488   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2489   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2490   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2491   MatScalar      *aworkA,*aworkB;
2492   PetscScalar    *vals;
2493   PetscMPIInt    tag1,tag2,tag3,imdex;
2494   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2495                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2496   MPI_Status     recv_status,*send_status;
2497   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2498   PetscInt       **rbuf_j=PETSC_NULL;
2499   PetscScalar    **rbuf_a=PETSC_NULL;
2500   Mat_Redundant  *redund=PETSC_NULL;
2501   PetscContainer container;
2502 
2503   PetscFunctionBegin;
2504   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2505   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2506 
2507   if (reuse == MAT_REUSE_MATRIX) {
2508     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2509     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2510     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2511     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2512     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2513     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2514     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2515     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2516 
2517     nsends    = redund->nsends;
2518     nrecvs    = redund->nrecvs;
2519     send_rank = redund->send_rank;
2520     recv_rank = redund->recv_rank;
2521     sbuf_nz   = redund->sbuf_nz;
2522     rbuf_nz   = redund->rbuf_nz;
2523     sbuf_j    = redund->sbuf_j;
2524     sbuf_a    = redund->sbuf_a;
2525     rbuf_j    = redund->rbuf_j;
2526     rbuf_a    = redund->rbuf_a;
2527   }
2528 
2529   if (reuse == MAT_INITIAL_MATRIX){
2530     PetscMPIInt  subrank,subsize;
2531     PetscInt     nleftover,np_subcomm;
2532     /* get the destination processors' id send_rank, nsends and nrecvs */
2533     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2534     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2535     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2536     np_subcomm = size/nsubcomm;
2537     nleftover  = size - nsubcomm*np_subcomm;
2538     nsends = 0; nrecvs = 0;
2539     for (i=0; i<size; i++){ /* i=rank*/
2540       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2541         send_rank[nsends] = i; nsends++;
2542         recv_rank[nrecvs++] = i;
2543       }
2544     }
2545     if (rank >= size - nleftover){/* this proc is a leftover processor */
2546       i = size-nleftover-1;
2547       j = 0;
2548       while (j < nsubcomm - nleftover){
2549         send_rank[nsends++] = i;
2550         i--; j++;
2551       }
2552     }
2553 
2554     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2555       for (i=0; i<nleftover; i++){
2556         recv_rank[nrecvs++] = size-nleftover+i;
2557       }
2558     }
2559 
2560     /* allocate sbuf_j, sbuf_a */
2561     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2562     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2563     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2564   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2565 
2566   /* copy mat's local entries into the buffers */
2567   if (reuse == MAT_INITIAL_MATRIX){
2568     rownz_max = 0;
2569     rptr = sbuf_j;
2570     cols = sbuf_j + rend-rstart + 1;
2571     vals = sbuf_a;
2572     rptr[0] = 0;
2573     for (i=0; i<rend-rstart; i++){
2574       row = i + rstart;
2575       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2576       ncols  = nzA + nzB;
2577       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2578       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2579       /* load the column indices for this row into cols */
2580       lwrite = 0;
2581       for (l=0; l<nzB; l++) {
2582         if ((ctmp = bmap[cworkB[l]]) < cstart){
2583           vals[lwrite]   = aworkB[l];
2584           cols[lwrite++] = ctmp;
2585         }
2586       }
2587       for (l=0; l<nzA; l++){
2588         vals[lwrite]   = aworkA[l];
2589         cols[lwrite++] = cstart + cworkA[l];
2590       }
2591       for (l=0; l<nzB; l++) {
2592         if ((ctmp = bmap[cworkB[l]]) >= cend){
2593           vals[lwrite]   = aworkB[l];
2594           cols[lwrite++] = ctmp;
2595         }
2596       }
2597       vals += ncols;
2598       cols += ncols;
2599       rptr[i+1] = rptr[i] + ncols;
2600       if (rownz_max < ncols) rownz_max = ncols;
2601     }
2602     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2603   } else { /* only copy matrix values into sbuf_a */
2604     rptr = sbuf_j;
2605     vals = sbuf_a;
2606     rptr[0] = 0;
2607     for (i=0; i<rend-rstart; i++){
2608       row = i + rstart;
2609       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2610       ncols  = nzA + nzB;
2611       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2612       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2613       lwrite = 0;
2614       for (l=0; l<nzB; l++) {
2615         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2616       }
2617       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2618       for (l=0; l<nzB; l++) {
2619         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2620       }
2621       vals += ncols;
2622       rptr[i+1] = rptr[i] + ncols;
2623     }
2624   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2625 
2626   /* send nzlocal to others, and recv other's nzlocal */
2627   /*--------------------------------------------------*/
2628   if (reuse == MAT_INITIAL_MATRIX){
2629     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2630     s_waits2 = s_waits3 + nsends;
2631     s_waits1 = s_waits2 + nsends;
2632     r_waits1 = s_waits1 + nsends;
2633     r_waits2 = r_waits1 + nrecvs;
2634     r_waits3 = r_waits2 + nrecvs;
2635   } else {
2636     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2637     r_waits3 = s_waits3 + nsends;
2638   }
2639 
2640   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2641   if (reuse == MAT_INITIAL_MATRIX){
2642     /* get new tags to keep the communication clean */
2643     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2644     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2645     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2646 
2647     /* post receives of other's nzlocal */
2648     for (i=0; i<nrecvs; i++){
2649       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2650     }
2651     /* send nzlocal to others */
2652     for (i=0; i<nsends; i++){
2653       sbuf_nz[i] = nzlocal;
2654       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2655     }
2656     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2657     count = nrecvs;
2658     while (count) {
2659       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2660       recv_rank[imdex] = recv_status.MPI_SOURCE;
2661       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2662       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2663 
2664       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2665       rbuf_nz[imdex] += i + 2;
2666       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2667       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2668       count--;
2669     }
2670     /* wait on sends of nzlocal */
2671     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2672     /* send mat->i,j to others, and recv from other's */
2673     /*------------------------------------------------*/
2674     for (i=0; i<nsends; i++){
2675       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2676       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2677     }
2678     /* wait on receives of mat->i,j */
2679     /*------------------------------*/
2680     count = nrecvs;
2681     while (count) {
2682       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2683       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2684       count--;
2685     }
2686     /* wait on sends of mat->i,j */
2687     /*---------------------------*/
2688     if (nsends) {
2689       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2690     }
2691   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2692 
2693   /* post receives, send and receive mat->a */
2694   /*----------------------------------------*/
2695   for (imdex=0; imdex<nrecvs; imdex++) {
2696     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2697   }
2698   for (i=0; i<nsends; i++){
2699     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2700   }
2701   count = nrecvs;
2702   while (count) {
2703     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2704     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2705     count--;
2706   }
2707   if (nsends) {
2708     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2709   }
2710 
2711   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2712 
2713   /* create redundant matrix */
2714   /*-------------------------*/
2715   if (reuse == MAT_INITIAL_MATRIX){
2716     /* compute rownz_max for preallocation */
2717     for (imdex=0; imdex<nrecvs; imdex++){
2718       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2719       rptr = rbuf_j[imdex];
2720       for (i=0; i<j; i++){
2721         ncols = rptr[i+1] - rptr[i];
2722         if (rownz_max < ncols) rownz_max = ncols;
2723       }
2724     }
2725 
2726     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2727     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2728     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2729     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2730     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2731   } else {
2732     C = *matredundant;
2733   }
2734 
2735   /* insert local matrix entries */
2736   rptr = sbuf_j;
2737   cols = sbuf_j + rend-rstart + 1;
2738   vals = sbuf_a;
2739   for (i=0; i<rend-rstart; i++){
2740     row   = i + rstart;
2741     ncols = rptr[i+1] - rptr[i];
2742     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2743     vals += ncols;
2744     cols += ncols;
2745   }
2746   /* insert received matrix entries */
2747   for (imdex=0; imdex<nrecvs; imdex++){
2748     rstart = rowrange[recv_rank[imdex]];
2749     rend   = rowrange[recv_rank[imdex]+1];
2750     rptr = rbuf_j[imdex];
2751     cols = rbuf_j[imdex] + rend-rstart + 1;
2752     vals = rbuf_a[imdex];
2753     for (i=0; i<rend-rstart; i++){
2754       row   = i + rstart;
2755       ncols = rptr[i+1] - rptr[i];
2756       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2757       vals += ncols;
2758       cols += ncols;
2759     }
2760   }
2761   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2763   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2764   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2765   if (reuse == MAT_INITIAL_MATRIX) {
2766     PetscContainer container;
2767     *matredundant = C;
2768     /* create a supporting struct and attach it to C for reuse */
2769     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2770     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2771     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2772     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2773     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2774     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2775 
2776     redund->nzlocal = nzlocal;
2777     redund->nsends  = nsends;
2778     redund->nrecvs  = nrecvs;
2779     redund->send_rank = send_rank;
2780     redund->recv_rank = recv_rank;
2781     redund->sbuf_nz = sbuf_nz;
2782     redund->rbuf_nz = rbuf_nz;
2783     redund->sbuf_j  = sbuf_j;
2784     redund->sbuf_a  = sbuf_a;
2785     redund->rbuf_j  = rbuf_j;
2786     redund->rbuf_a  = rbuf_a;
2787 
2788     redund->Destroy = C->ops->destroy;
2789     C->ops->destroy = MatDestroy_MatRedundant;
2790   }
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2796 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2797 {
2798   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2799   PetscErrorCode ierr;
2800   PetscInt       i,*idxb = 0;
2801   PetscScalar    *va,*vb;
2802   Vec            vtmp;
2803 
2804   PetscFunctionBegin;
2805   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2806   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2807   if (idx) {
2808     for (i=0; i<A->rmap->n; i++) {
2809       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2810     }
2811   }
2812 
2813   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2814   if (idx) {
2815     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2816   }
2817   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2818   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2819 
2820   for (i=0; i<A->rmap->n; i++){
2821     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2822       va[i] = vb[i];
2823       if (idx) idx[i] = a->garray[idxb[i]];
2824     }
2825   }
2826 
2827   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2828   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2829   ierr = PetscFree(idxb);CHKERRQ(ierr);
2830   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 #undef __FUNCT__
2835 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2836 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2837 {
2838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2839   PetscErrorCode ierr;
2840   PetscInt       i,*idxb = 0;
2841   PetscScalar    *va,*vb;
2842   Vec            vtmp;
2843 
2844   PetscFunctionBegin;
2845   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2846   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2847   if (idx) {
2848     for (i=0; i<A->cmap->n; i++) {
2849       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2850     }
2851   }
2852 
2853   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2854   if (idx) {
2855     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2856   }
2857   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2858   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2859 
2860   for (i=0; i<A->rmap->n; i++){
2861     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2862       va[i] = vb[i];
2863       if (idx) idx[i] = a->garray[idxb[i]];
2864     }
2865   }
2866 
2867   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2868   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2869   ierr = PetscFree(idxb);CHKERRQ(ierr);
2870   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2876 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2877 {
2878   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2879   PetscInt       n      = A->rmap->n;
2880   PetscInt       cstart = A->cmap->rstart;
2881   PetscInt      *cmap   = mat->garray;
2882   PetscInt      *diagIdx, *offdiagIdx;
2883   Vec            diagV, offdiagV;
2884   PetscScalar   *a, *diagA, *offdiagA;
2885   PetscInt       r;
2886   PetscErrorCode ierr;
2887 
2888   PetscFunctionBegin;
2889   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2890   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2891   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2892   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2893   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2894   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2895   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2896   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2897   for(r = 0; r < n; ++r) {
2898     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2899       a[r]   = diagA[r];
2900       idx[r] = cstart + diagIdx[r];
2901     } else {
2902       a[r]   = offdiagA[r];
2903       idx[r] = cmap[offdiagIdx[r]];
2904     }
2905   }
2906   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2907   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2908   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2909   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2910   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2911   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 #undef __FUNCT__
2916 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2917 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2918 {
2919   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2920   PetscInt       n      = A->rmap->n;
2921   PetscInt       cstart = A->cmap->rstart;
2922   PetscInt      *cmap   = mat->garray;
2923   PetscInt      *diagIdx, *offdiagIdx;
2924   Vec            diagV, offdiagV;
2925   PetscScalar   *a, *diagA, *offdiagA;
2926   PetscInt       r;
2927   PetscErrorCode ierr;
2928 
2929   PetscFunctionBegin;
2930   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2931   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2932   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2933   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2934   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2935   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2936   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2937   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2938   for(r = 0; r < n; ++r) {
2939     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2940       a[r]   = diagA[r];
2941       idx[r] = cstart + diagIdx[r];
2942     } else {
2943       a[r]   = offdiagA[r];
2944       idx[r] = cmap[offdiagIdx[r]];
2945     }
2946   }
2947   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2948   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2949   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2950   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2951   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2952   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2958 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2959 {
2960   PetscErrorCode ierr;
2961   Mat            *dummy;
2962 
2963   PetscFunctionBegin;
2964   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2965   *newmat = *dummy;
2966   ierr = PetscFree(dummy);CHKERRQ(ierr);
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2974 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2975 {
2976   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2977   PetscErrorCode ierr;
2978 
2979   PetscFunctionBegin;
2980   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 
2985 /* -------------------------------------------------------------------*/
2986 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2987        MatGetRow_MPIAIJ,
2988        MatRestoreRow_MPIAIJ,
2989        MatMult_MPIAIJ,
2990 /* 4*/ MatMultAdd_MPIAIJ,
2991        MatMultTranspose_MPIAIJ,
2992        MatMultTransposeAdd_MPIAIJ,
2993 #ifdef PETSC_HAVE_PBGL
2994        MatSolve_MPIAIJ,
2995 #else
2996        0,
2997 #endif
2998        0,
2999        0,
3000 /*10*/ 0,
3001        0,
3002        0,
3003        MatSOR_MPIAIJ,
3004        MatTranspose_MPIAIJ,
3005 /*15*/ MatGetInfo_MPIAIJ,
3006        MatEqual_MPIAIJ,
3007        MatGetDiagonal_MPIAIJ,
3008        MatDiagonalScale_MPIAIJ,
3009        MatNorm_MPIAIJ,
3010 /*20*/ MatAssemblyBegin_MPIAIJ,
3011        MatAssemblyEnd_MPIAIJ,
3012        MatSetOption_MPIAIJ,
3013        MatZeroEntries_MPIAIJ,
3014 /*24*/ MatZeroRows_MPIAIJ,
3015        0,
3016 #ifdef PETSC_HAVE_PBGL
3017        0,
3018 #else
3019        0,
3020 #endif
3021        0,
3022        0,
3023 /*29*/ MatSetUpPreallocation_MPIAIJ,
3024 #ifdef PETSC_HAVE_PBGL
3025        0,
3026 #else
3027        0,
3028 #endif
3029        0,
3030        0,
3031        0,
3032 /*34*/ MatDuplicate_MPIAIJ,
3033        0,
3034        0,
3035        0,
3036        0,
3037 /*39*/ MatAXPY_MPIAIJ,
3038        MatGetSubMatrices_MPIAIJ,
3039        MatIncreaseOverlap_MPIAIJ,
3040        MatGetValues_MPIAIJ,
3041        MatCopy_MPIAIJ,
3042 /*44*/ MatGetRowMax_MPIAIJ,
3043        MatScale_MPIAIJ,
3044        0,
3045        0,
3046        MatZeroRowsColumns_MPIAIJ,
3047 /*49*/ MatSetBlockSize_MPIAIJ,
3048        0,
3049        0,
3050        0,
3051        0,
3052 /*54*/ MatFDColoringCreate_MPIAIJ,
3053        0,
3054        MatSetUnfactored_MPIAIJ,
3055        MatPermute_MPIAIJ,
3056        0,
3057 /*59*/ MatGetSubMatrix_MPIAIJ,
3058        MatDestroy_MPIAIJ,
3059        MatView_MPIAIJ,
3060        0,
3061        0,
3062 /*64*/ 0,
3063        0,
3064        0,
3065        0,
3066        0,
3067 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3068        MatGetRowMinAbs_MPIAIJ,
3069        0,
3070        MatSetColoring_MPIAIJ,
3071 #if defined(PETSC_HAVE_ADIC)
3072        MatSetValuesAdic_MPIAIJ,
3073 #else
3074        0,
3075 #endif
3076        MatSetValuesAdifor_MPIAIJ,
3077 /*75*/ MatFDColoringApply_AIJ,
3078        0,
3079        0,
3080        0,
3081        0,
3082 /*80*/ 0,
3083        0,
3084        0,
3085 /*83*/ MatLoad_MPIAIJ,
3086        0,
3087        0,
3088        0,
3089        0,
3090        0,
3091 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3092        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3093        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3094        MatPtAP_Basic,
3095        MatPtAPSymbolic_MPIAIJ,
3096 /*94*/ MatPtAPNumeric_MPIAIJ,
3097        0,
3098        0,
3099        0,
3100        0,
3101 /*99*/ 0,
3102        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3103        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3104        MatConjugate_MPIAIJ,
3105        0,
3106 /*104*/MatSetValuesRow_MPIAIJ,
3107        MatRealPart_MPIAIJ,
3108        MatImaginaryPart_MPIAIJ,
3109        0,
3110        0,
3111 /*109*/0,
3112        MatGetRedundantMatrix_MPIAIJ,
3113        MatGetRowMin_MPIAIJ,
3114        0,
3115        0,
3116 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3117        0,
3118        0,
3119        0,
3120        0,
3121 /*119*/0,
3122        0,
3123        0,
3124        0,
3125        MatGetMultiProcBlock_MPIAIJ,
3126 /*124*/MatFindNonZeroRows_MPIAIJ,
3127        MatGetColumnNorms_MPIAIJ,
3128        MatInvertBlockDiagonal_MPIAIJ,
3129        0,
3130        MatGetSubMatricesParallel_MPIAIJ,
3131 /*129*/0,
3132        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3133        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3134        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3135        0,
3136 /*134*/0,
3137        0,
3138        0,
3139        0,
3140        0
3141 };
3142 
3143 /* ----------------------------------------------------------------------------------------*/
3144 
3145 EXTERN_C_BEGIN
3146 #undef __FUNCT__
3147 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3148 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3149 {
3150   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3151   PetscErrorCode ierr;
3152 
3153   PetscFunctionBegin;
3154   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3155   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3156   PetscFunctionReturn(0);
3157 }
3158 EXTERN_C_END
3159 
3160 EXTERN_C_BEGIN
3161 #undef __FUNCT__
3162 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3163 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3164 {
3165   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3166   PetscErrorCode ierr;
3167 
3168   PetscFunctionBegin;
3169   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3170   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3171   PetscFunctionReturn(0);
3172 }
3173 EXTERN_C_END
3174 
3175 EXTERN_C_BEGIN
3176 #undef __FUNCT__
3177 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3178 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3179 {
3180   Mat_MPIAIJ     *b;
3181   PetscErrorCode ierr;
3182   PetscInt       i;
3183 
3184   PetscFunctionBegin;
3185   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3186   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3187   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3188   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3189 
3190   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3191   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3192   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3193   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3194   if (d_nnz) {
3195     for (i=0; i<B->rmap->n; i++) {
3196       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3197     }
3198   }
3199   if (o_nnz) {
3200     for (i=0; i<B->rmap->n; i++) {
3201       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3202     }
3203   }
3204   b = (Mat_MPIAIJ*)B->data;
3205 
3206   if (!B->preallocated) {
3207     /* Explicitly create 2 MATSEQAIJ matrices. */
3208     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3209     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3210     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3211     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3212     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3213     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3214     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3215     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3216   }
3217 
3218   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3219   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3220   B->preallocated = PETSC_TRUE;
3221   PetscFunctionReturn(0);
3222 }
3223 EXTERN_C_END
3224 
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3227 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3228 {
3229   Mat            mat;
3230   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3231   PetscErrorCode ierr;
3232 
3233   PetscFunctionBegin;
3234   *newmat       = 0;
3235   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3236   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3237   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3238   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3239   a    = (Mat_MPIAIJ*)mat->data;
3240 
3241   mat->factortype    = matin->factortype;
3242   mat->rmap->bs      = matin->rmap->bs;
3243   mat->assembled    = PETSC_TRUE;
3244   mat->insertmode   = NOT_SET_VALUES;
3245   mat->preallocated = PETSC_TRUE;
3246 
3247   a->size           = oldmat->size;
3248   a->rank           = oldmat->rank;
3249   a->donotstash     = oldmat->donotstash;
3250   a->roworiented    = oldmat->roworiented;
3251   a->rowindices     = 0;
3252   a->rowvalues      = 0;
3253   a->getrowactive   = PETSC_FALSE;
3254 
3255   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3256   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3257 
3258   if (oldmat->colmap) {
3259 #if defined (PETSC_USE_CTABLE)
3260     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3261 #else
3262     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3263     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3264     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3265 #endif
3266   } else a->colmap = 0;
3267   if (oldmat->garray) {
3268     PetscInt len;
3269     len  = oldmat->B->cmap->n;
3270     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3271     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3272     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3273   } else a->garray = 0;
3274 
3275   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3276   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3277   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3278   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3279   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3280   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3281   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3282   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3283   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3284   *newmat = mat;
3285   PetscFunctionReturn(0);
3286 }
3287 
3288 
3289 
3290 #undef __FUNCT__
3291 #define __FUNCT__ "MatLoad_MPIAIJ"
3292 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3293 {
3294   PetscScalar    *vals,*svals;
3295   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3296   PetscErrorCode ierr;
3297   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3298   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3299   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3300   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3301   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3302   int            fd;
3303 
3304   PetscFunctionBegin;
3305   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3306   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3307   if (!rank) {
3308     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3309     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3310     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3311   }
3312 
3313   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3314 
3315   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3316   M = header[1]; N = header[2];
3317   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3318   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3319   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3320 
3321   /* If global sizes are set, check if they are consistent with that given in the file */
3322   if (sizesset) {
3323     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3324   }
3325   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3326   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3327 
3328   /* determine ownership of all rows */
3329   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3330   else m = newMat->rmap->n; /* Set by user */
3331 
3332   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3333   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3334 
3335   /* First process needs enough room for process with most rows */
3336   if (!rank) {
3337     mmax       = rowners[1];
3338     for (i=2; i<size; i++) {
3339       mmax = PetscMax(mmax,rowners[i]);
3340     }
3341   } else mmax = m;
3342 
3343   rowners[0] = 0;
3344   for (i=2; i<=size; i++) {
3345     rowners[i] += rowners[i-1];
3346   }
3347   rstart = rowners[rank];
3348   rend   = rowners[rank+1];
3349 
3350   /* distribute row lengths to all processors */
3351   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3352   if (!rank) {
3353     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3354     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3355     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3356     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3357     for (j=0; j<m; j++) {
3358       procsnz[0] += ourlens[j];
3359     }
3360     for (i=1; i<size; i++) {
3361       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3362       /* calculate the number of nonzeros on each processor */
3363       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3364         procsnz[i] += rowlengths[j];
3365       }
3366       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3367     }
3368     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3369   } else {
3370     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3371   }
3372 
3373   if (!rank) {
3374     /* determine max buffer needed and allocate it */
3375     maxnz = 0;
3376     for (i=0; i<size; i++) {
3377       maxnz = PetscMax(maxnz,procsnz[i]);
3378     }
3379     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3380 
3381     /* read in my part of the matrix column indices  */
3382     nz   = procsnz[0];
3383     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3384     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3385 
3386     /* read in every one elses and ship off */
3387     for (i=1; i<size; i++) {
3388       nz     = procsnz[i];
3389       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3390       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3391     }
3392     ierr = PetscFree(cols);CHKERRQ(ierr);
3393   } else {
3394     /* determine buffer space needed for message */
3395     nz = 0;
3396     for (i=0; i<m; i++) {
3397       nz += ourlens[i];
3398     }
3399     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3400 
3401     /* receive message of column indices*/
3402     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3403   }
3404 
3405   /* determine column ownership if matrix is not square */
3406   if (N != M) {
3407     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3408     else n = newMat->cmap->n;
3409     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3410     cstart = cend - n;
3411   } else {
3412     cstart = rstart;
3413     cend   = rend;
3414     n      = cend - cstart;
3415   }
3416 
3417   /* loop over local rows, determining number of off diagonal entries */
3418   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3419   jj = 0;
3420   for (i=0; i<m; i++) {
3421     for (j=0; j<ourlens[i]; j++) {
3422       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3423       jj++;
3424     }
3425   }
3426 
3427   for (i=0; i<m; i++) {
3428     ourlens[i] -= offlens[i];
3429   }
3430   if (!sizesset) {
3431     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3432   }
3433   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3434 
3435   for (i=0; i<m; i++) {
3436     ourlens[i] += offlens[i];
3437   }
3438 
3439   if (!rank) {
3440     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3441 
3442     /* read in my part of the matrix numerical values  */
3443     nz   = procsnz[0];
3444     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3445 
3446     /* insert into matrix */
3447     jj      = rstart;
3448     smycols = mycols;
3449     svals   = vals;
3450     for (i=0; i<m; i++) {
3451       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3452       smycols += ourlens[i];
3453       svals   += ourlens[i];
3454       jj++;
3455     }
3456 
3457     /* read in other processors and ship out */
3458     for (i=1; i<size; i++) {
3459       nz     = procsnz[i];
3460       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3461       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3462     }
3463     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3464   } else {
3465     /* receive numeric values */
3466     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3467 
3468     /* receive message of values*/
3469     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3470 
3471     /* insert into matrix */
3472     jj      = rstart;
3473     smycols = mycols;
3474     svals   = vals;
3475     for (i=0; i<m; i++) {
3476       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3477       smycols += ourlens[i];
3478       svals   += ourlens[i];
3479       jj++;
3480     }
3481   }
3482   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3483   ierr = PetscFree(vals);CHKERRQ(ierr);
3484   ierr = PetscFree(mycols);CHKERRQ(ierr);
3485   ierr = PetscFree(rowners);CHKERRQ(ierr);
3486 
3487   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3488   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 #undef __FUNCT__
3493 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3494 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3495 {
3496   PetscErrorCode ierr;
3497   IS             iscol_local;
3498   PetscInt       csize;
3499 
3500   PetscFunctionBegin;
3501   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3502   if (call == MAT_REUSE_MATRIX) {
3503     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3504     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3505   } else {
3506     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3507   }
3508   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3509   if (call == MAT_INITIAL_MATRIX) {
3510     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3511     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3512   }
3513   PetscFunctionReturn(0);
3514 }
3515 
3516 #undef __FUNCT__
3517 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3518 /*
3519     Not great since it makes two copies of the submatrix, first an SeqAIJ
3520   in local and then by concatenating the local matrices the end result.
3521   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3522 
3523   Note: This requires a sequential iscol with all indices.
3524 */
3525 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3526 {
3527   PetscErrorCode ierr;
3528   PetscMPIInt    rank,size;
3529   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3530   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3531   Mat            *local,M,Mreuse;
3532   MatScalar      *vwork,*aa;
3533   MPI_Comm       comm = ((PetscObject)mat)->comm;
3534   Mat_SeqAIJ     *aij;
3535 
3536 
3537   PetscFunctionBegin;
3538   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3539   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3540 
3541   if (call ==  MAT_REUSE_MATRIX) {
3542     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3543     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3544     local = &Mreuse;
3545     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3546   } else {
3547     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3548     Mreuse = *local;
3549     ierr   = PetscFree(local);CHKERRQ(ierr);
3550   }
3551 
3552   /*
3553       m - number of local rows
3554       n - number of columns (same on all processors)
3555       rstart - first row in new global matrix generated
3556   */
3557   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3558   if (call == MAT_INITIAL_MATRIX) {
3559     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3560     ii  = aij->i;
3561     jj  = aij->j;
3562 
3563     /*
3564         Determine the number of non-zeros in the diagonal and off-diagonal
3565         portions of the matrix in order to do correct preallocation
3566     */
3567 
3568     /* first get start and end of "diagonal" columns */
3569     if (csize == PETSC_DECIDE) {
3570       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3571       if (mglobal == n) { /* square matrix */
3572 	nlocal = m;
3573       } else {
3574         nlocal = n/size + ((n % size) > rank);
3575       }
3576     } else {
3577       nlocal = csize;
3578     }
3579     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3580     rstart = rend - nlocal;
3581     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3582 
3583     /* next, compute all the lengths */
3584     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3585     olens = dlens + m;
3586     for (i=0; i<m; i++) {
3587       jend = ii[i+1] - ii[i];
3588       olen = 0;
3589       dlen = 0;
3590       for (j=0; j<jend; j++) {
3591         if (*jj < rstart || *jj >= rend) olen++;
3592         else dlen++;
3593         jj++;
3594       }
3595       olens[i] = olen;
3596       dlens[i] = dlen;
3597     }
3598     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3599     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3600     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3601     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3602     ierr = PetscFree(dlens);CHKERRQ(ierr);
3603   } else {
3604     PetscInt ml,nl;
3605 
3606     M = *newmat;
3607     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3608     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3609     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3610     /*
3611          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3612        rather than the slower MatSetValues().
3613     */
3614     M->was_assembled = PETSC_TRUE;
3615     M->assembled     = PETSC_FALSE;
3616   }
3617   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3618   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3619   ii  = aij->i;
3620   jj  = aij->j;
3621   aa  = aij->a;
3622   for (i=0; i<m; i++) {
3623     row   = rstart + i;
3624     nz    = ii[i+1] - ii[i];
3625     cwork = jj;     jj += nz;
3626     vwork = aa;     aa += nz;
3627     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3628   }
3629 
3630   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   *newmat = M;
3633 
3634   /* save submatrix used in processor for next request */
3635   if (call ==  MAT_INITIAL_MATRIX) {
3636     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3637     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3638   }
3639 
3640   PetscFunctionReturn(0);
3641 }
3642 
3643 EXTERN_C_BEGIN
3644 #undef __FUNCT__
3645 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3646 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3647 {
3648   PetscInt       m,cstart, cend,j,nnz,i,d;
3649   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3650   const PetscInt *JJ;
3651   PetscScalar    *values;
3652   PetscErrorCode ierr;
3653 
3654   PetscFunctionBegin;
3655   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3656 
3657   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3658   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3659   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3660   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3661   m      = B->rmap->n;
3662   cstart = B->cmap->rstart;
3663   cend   = B->cmap->rend;
3664   rstart = B->rmap->rstart;
3665 
3666   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3667 
3668 #if defined(PETSC_USE_DEBUGGING)
3669   for (i=0; i<m; i++) {
3670     nnz     = Ii[i+1]- Ii[i];
3671     JJ      = J + Ii[i];
3672     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3673     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3674     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3675   }
3676 #endif
3677 
3678   for (i=0; i<m; i++) {
3679     nnz     = Ii[i+1]- Ii[i];
3680     JJ      = J + Ii[i];
3681     nnz_max = PetscMax(nnz_max,nnz);
3682     d       = 0;
3683     for (j=0; j<nnz; j++) {
3684       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3685     }
3686     d_nnz[i] = d;
3687     o_nnz[i] = nnz - d;
3688   }
3689   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3690   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3691 
3692   if (v) values = (PetscScalar*)v;
3693   else {
3694     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3695     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3696   }
3697 
3698   for (i=0; i<m; i++) {
3699     ii   = i + rstart;
3700     nnz  = Ii[i+1]- Ii[i];
3701     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3702   }
3703   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3704   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3705 
3706   if (!v) {
3707     ierr = PetscFree(values);CHKERRQ(ierr);
3708   }
3709   PetscFunctionReturn(0);
3710 }
3711 EXTERN_C_END
3712 
3713 #undef __FUNCT__
3714 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3715 /*@
3716    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3717    (the default parallel PETSc format).
3718 
3719    Collective on MPI_Comm
3720 
3721    Input Parameters:
3722 +  B - the matrix
3723 .  i - the indices into j for the start of each local row (starts with zero)
3724 .  j - the column indices for each local row (starts with zero)
3725 -  v - optional values in the matrix
3726 
3727    Level: developer
3728 
3729    Notes:
3730        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3731      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3732      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3733 
3734        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3735 
3736        The format which is used for the sparse matrix input, is equivalent to a
3737     row-major ordering.. i.e for the following matrix, the input data expected is
3738     as shown:
3739 
3740         1 0 0
3741         2 0 3     P0
3742        -------
3743         4 5 6     P1
3744 
3745      Process0 [P0]: rows_owned=[0,1]
3746         i =  {0,1,3}  [size = nrow+1  = 2+1]
3747         j =  {0,0,2}  [size = nz = 6]
3748         v =  {1,2,3}  [size = nz = 6]
3749 
3750      Process1 [P1]: rows_owned=[2]
3751         i =  {0,3}    [size = nrow+1  = 1+1]
3752         j =  {0,1,2}  [size = nz = 6]
3753         v =  {4,5,6}  [size = nz = 6]
3754 
3755 .keywords: matrix, aij, compressed row, sparse, parallel
3756 
3757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3758           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3759 @*/
3760 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3761 {
3762   PetscErrorCode ierr;
3763 
3764   PetscFunctionBegin;
3765   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 #undef __FUNCT__
3770 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3771 /*@C
3772    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3773    (the default parallel PETSc format).  For good matrix assembly performance
3774    the user should preallocate the matrix storage by setting the parameters
3775    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3776    performance can be increased by more than a factor of 50.
3777 
3778    Collective on MPI_Comm
3779 
3780    Input Parameters:
3781 +  A - the matrix
3782 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3783            (same value is used for all local rows)
3784 .  d_nnz - array containing the number of nonzeros in the various rows of the
3785            DIAGONAL portion of the local submatrix (possibly different for each row)
3786            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3787            The size of this array is equal to the number of local rows, i.e 'm'.
3788            You must leave room for the diagonal entry even if it is zero.
3789 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3790            submatrix (same value is used for all local rows).
3791 -  o_nnz - array containing the number of nonzeros in the various rows of the
3792            OFF-DIAGONAL portion of the local submatrix (possibly different for
3793            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3794            structure. The size of this array is equal to the number
3795            of local rows, i.e 'm'.
3796 
3797    If the *_nnz parameter is given then the *_nz parameter is ignored
3798 
3799    The AIJ format (also called the Yale sparse matrix format or
3800    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3801    storage.  The stored row and column indices begin with zero.
3802    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3803 
3804    The parallel matrix is partitioned such that the first m0 rows belong to
3805    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3806    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3807 
3808    The DIAGONAL portion of the local submatrix of a processor can be defined
3809    as the submatrix which is obtained by extraction the part corresponding to
3810    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3811    first row that belongs to the processor, r2 is the last row belonging to
3812    the this processor, and c1-c2 is range of indices of the local part of a
3813    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3814    common case of a square matrix, the row and column ranges are the same and
3815    the DIAGONAL part is also square. The remaining portion of the local
3816    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3817 
3818    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3819 
3820    You can call MatGetInfo() to get information on how effective the preallocation was;
3821    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3822    You can also run with the option -info and look for messages with the string
3823    malloc in them to see if additional memory allocation was needed.
3824 
3825    Example usage:
3826 
3827    Consider the following 8x8 matrix with 34 non-zero values, that is
3828    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3829    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3830    as follows:
3831 
3832 .vb
3833             1  2  0  |  0  3  0  |  0  4
3834     Proc0   0  5  6  |  7  0  0  |  8  0
3835             9  0 10  | 11  0  0  | 12  0
3836     -------------------------------------
3837            13  0 14  | 15 16 17  |  0  0
3838     Proc1   0 18  0  | 19 20 21  |  0  0
3839             0  0  0  | 22 23  0  | 24  0
3840     -------------------------------------
3841     Proc2  25 26 27  |  0  0 28  | 29  0
3842            30  0  0  | 31 32 33  |  0 34
3843 .ve
3844 
3845    This can be represented as a collection of submatrices as:
3846 
3847 .vb
3848       A B C
3849       D E F
3850       G H I
3851 .ve
3852 
3853    Where the submatrices A,B,C are owned by proc0, D,E,F are
3854    owned by proc1, G,H,I are owned by proc2.
3855 
3856    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3857    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3858    The 'M','N' parameters are 8,8, and have the same values on all procs.
3859 
3860    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3861    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3862    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3863    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3864    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3865    matrix, ans [DF] as another SeqAIJ matrix.
3866 
3867    When d_nz, o_nz parameters are specified, d_nz storage elements are
3868    allocated for every row of the local diagonal submatrix, and o_nz
3869    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3870    One way to choose d_nz and o_nz is to use the max nonzerors per local
3871    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3872    In this case, the values of d_nz,o_nz are:
3873 .vb
3874      proc0 : dnz = 2, o_nz = 2
3875      proc1 : dnz = 3, o_nz = 2
3876      proc2 : dnz = 1, o_nz = 4
3877 .ve
3878    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3879    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3880    for proc3. i.e we are using 12+15+10=37 storage locations to store
3881    34 values.
3882 
3883    When d_nnz, o_nnz parameters are specified, the storage is specified
3884    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3885    In the above case the values for d_nnz,o_nnz are:
3886 .vb
3887      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3888      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3889      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3890 .ve
3891    Here the space allocated is sum of all the above values i.e 34, and
3892    hence pre-allocation is perfect.
3893 
3894    Level: intermediate
3895 
3896 .keywords: matrix, aij, compressed row, sparse, parallel
3897 
3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3899           MPIAIJ, MatGetInfo()
3900 @*/
3901 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3902 {
3903   PetscErrorCode ierr;
3904 
3905   PetscFunctionBegin;
3906   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3907   PetscValidType(B,1);
3908   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3909   PetscFunctionReturn(0);
3910 }
3911 
3912 #undef __FUNCT__
3913 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3914 /*@
3915      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3916          CSR format the local rows.
3917 
3918    Collective on MPI_Comm
3919 
3920    Input Parameters:
3921 +  comm - MPI communicator
3922 .  m - number of local rows (Cannot be PETSC_DECIDE)
3923 .  n - This value should be the same as the local size used in creating the
3924        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3925        calculated if N is given) For square matrices n is almost always m.
3926 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3927 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3928 .   i - row indices
3929 .   j - column indices
3930 -   a - matrix values
3931 
3932    Output Parameter:
3933 .   mat - the matrix
3934 
3935    Level: intermediate
3936 
3937    Notes:
3938        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3939      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3940      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3941 
3942        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3943 
3944        The format which is used for the sparse matrix input, is equivalent to a
3945     row-major ordering.. i.e for the following matrix, the input data expected is
3946     as shown:
3947 
3948         1 0 0
3949         2 0 3     P0
3950        -------
3951         4 5 6     P1
3952 
3953      Process0 [P0]: rows_owned=[0,1]
3954         i =  {0,1,3}  [size = nrow+1  = 2+1]
3955         j =  {0,0,2}  [size = nz = 6]
3956         v =  {1,2,3}  [size = nz = 6]
3957 
3958      Process1 [P1]: rows_owned=[2]
3959         i =  {0,3}    [size = nrow+1  = 1+1]
3960         j =  {0,1,2}  [size = nz = 6]
3961         v =  {4,5,6}  [size = nz = 6]
3962 
3963 .keywords: matrix, aij, compressed row, sparse, parallel
3964 
3965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3966           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3967 @*/
3968 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3969 {
3970   PetscErrorCode ierr;
3971 
3972  PetscFunctionBegin;
3973   if (i[0]) {
3974     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3975   }
3976   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3977   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3978   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3979   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3980   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3981   PetscFunctionReturn(0);
3982 }
3983 
3984 #undef __FUNCT__
3985 #define __FUNCT__ "MatCreateMPIAIJ"
3986 /*@C
3987    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3988    (the default parallel PETSc format).  For good matrix assembly performance
3989    the user should preallocate the matrix storage by setting the parameters
3990    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3991    performance can be increased by more than a factor of 50.
3992 
3993    Collective on MPI_Comm
3994 
3995    Input Parameters:
3996 +  comm - MPI communicator
3997 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3998            This value should be the same as the local size used in creating the
3999            y vector for the matrix-vector product y = Ax.
4000 .  n - This value should be the same as the local size used in creating the
4001        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4002        calculated if N is given) For square matrices n is almost always m.
4003 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4004 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4005 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4006            (same value is used for all local rows)
4007 .  d_nnz - array containing the number of nonzeros in the various rows of the
4008            DIAGONAL portion of the local submatrix (possibly different for each row)
4009            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4010            The size of this array is equal to the number of local rows, i.e 'm'.
4011            You must leave room for the diagonal entry even if it is zero.
4012 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4013            submatrix (same value is used for all local rows).
4014 -  o_nnz - array containing the number of nonzeros in the various rows of the
4015            OFF-DIAGONAL portion of the local submatrix (possibly different for
4016            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4017            structure. The size of this array is equal to the number
4018            of local rows, i.e 'm'.
4019 
4020    Output Parameter:
4021 .  A - the matrix
4022 
4023    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4024    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4025    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4026 
4027    Notes:
4028    If the *_nnz parameter is given then the *_nz parameter is ignored
4029 
4030    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4031    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4032    storage requirements for this matrix.
4033 
4034    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4035    processor than it must be used on all processors that share the object for
4036    that argument.
4037 
4038    The user MUST specify either the local or global matrix dimensions
4039    (possibly both).
4040 
4041    The parallel matrix is partitioned across processors such that the
4042    first m0 rows belong to process 0, the next m1 rows belong to
4043    process 1, the next m2 rows belong to process 2 etc.. where
4044    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4045    values corresponding to [m x N] submatrix.
4046 
4047    The columns are logically partitioned with the n0 columns belonging
4048    to 0th partition, the next n1 columns belonging to the next
4049    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4050 
4051    The DIAGONAL portion of the local submatrix on any given processor
4052    is the submatrix corresponding to the rows and columns m,n
4053    corresponding to the given processor. i.e diagonal matrix on
4054    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4055    etc. The remaining portion of the local submatrix [m x (N-n)]
4056    constitute the OFF-DIAGONAL portion. The example below better
4057    illustrates this concept.
4058 
4059    For a square global matrix we define each processor's diagonal portion
4060    to be its local rows and the corresponding columns (a square submatrix);
4061    each processor's off-diagonal portion encompasses the remainder of the
4062    local matrix (a rectangular submatrix).
4063 
4064    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4065 
4066    When calling this routine with a single process communicator, a matrix of
4067    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4068    type of communicator, use the construction mechanism:
4069      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4070 
4071    By default, this format uses inodes (identical nodes) when possible.
4072    We search for consecutive rows with the same nonzero structure, thereby
4073    reusing matrix information to achieve increased efficiency.
4074 
4075    Options Database Keys:
4076 +  -mat_no_inode  - Do not use inodes
4077 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4078 -  -mat_aij_oneindex - Internally use indexing starting at 1
4079         rather than 0.  Note that when calling MatSetValues(),
4080         the user still MUST index entries starting at 0!
4081 
4082 
4083    Example usage:
4084 
4085    Consider the following 8x8 matrix with 34 non-zero values, that is
4086    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4087    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4088    as follows:
4089 
4090 .vb
4091             1  2  0  |  0  3  0  |  0  4
4092     Proc0   0  5  6  |  7  0  0  |  8  0
4093             9  0 10  | 11  0  0  | 12  0
4094     -------------------------------------
4095            13  0 14  | 15 16 17  |  0  0
4096     Proc1   0 18  0  | 19 20 21  |  0  0
4097             0  0  0  | 22 23  0  | 24  0
4098     -------------------------------------
4099     Proc2  25 26 27  |  0  0 28  | 29  0
4100            30  0  0  | 31 32 33  |  0 34
4101 .ve
4102 
4103    This can be represented as a collection of submatrices as:
4104 
4105 .vb
4106       A B C
4107       D E F
4108       G H I
4109 .ve
4110 
4111    Where the submatrices A,B,C are owned by proc0, D,E,F are
4112    owned by proc1, G,H,I are owned by proc2.
4113 
4114    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4115    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4116    The 'M','N' parameters are 8,8, and have the same values on all procs.
4117 
4118    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4119    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4120    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4121    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4122    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4123    matrix, ans [DF] as another SeqAIJ matrix.
4124 
4125    When d_nz, o_nz parameters are specified, d_nz storage elements are
4126    allocated for every row of the local diagonal submatrix, and o_nz
4127    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4128    One way to choose d_nz and o_nz is to use the max nonzerors per local
4129    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4130    In this case, the values of d_nz,o_nz are:
4131 .vb
4132      proc0 : dnz = 2, o_nz = 2
4133      proc1 : dnz = 3, o_nz = 2
4134      proc2 : dnz = 1, o_nz = 4
4135 .ve
4136    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4137    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4138    for proc3. i.e we are using 12+15+10=37 storage locations to store
4139    34 values.
4140 
4141    When d_nnz, o_nnz parameters are specified, the storage is specified
4142    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4143    In the above case the values for d_nnz,o_nnz are:
4144 .vb
4145      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4146      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4147      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4148 .ve
4149    Here the space allocated is sum of all the above values i.e 34, and
4150    hence pre-allocation is perfect.
4151 
4152    Level: intermediate
4153 
4154 .keywords: matrix, aij, compressed row, sparse, parallel
4155 
4156 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4157           MPIAIJ, MatCreateMPIAIJWithArrays()
4158 @*/
4159 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4160 {
4161   PetscErrorCode ierr;
4162   PetscMPIInt    size;
4163 
4164   PetscFunctionBegin;
4165   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4166   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4167   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4168   if (size > 1) {
4169     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4170     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4171   } else {
4172     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4173     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4174   }
4175   PetscFunctionReturn(0);
4176 }
4177 
4178 #undef __FUNCT__
4179 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4180 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4181 {
4182   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4183 
4184   PetscFunctionBegin;
4185   *Ad     = a->A;
4186   *Ao     = a->B;
4187   *colmap = a->garray;
4188   PetscFunctionReturn(0);
4189 }
4190 
4191 #undef __FUNCT__
4192 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4193 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4194 {
4195   PetscErrorCode ierr;
4196   PetscInt       i;
4197   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4198 
4199   PetscFunctionBegin;
4200   if (coloring->ctype == IS_COLORING_GLOBAL) {
4201     ISColoringValue *allcolors,*colors;
4202     ISColoring      ocoloring;
4203 
4204     /* set coloring for diagonal portion */
4205     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4206 
4207     /* set coloring for off-diagonal portion */
4208     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4209     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4210     for (i=0; i<a->B->cmap->n; i++) {
4211       colors[i] = allcolors[a->garray[i]];
4212     }
4213     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4214     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4215     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4216     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4217   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4218     ISColoringValue *colors;
4219     PetscInt        *larray;
4220     ISColoring      ocoloring;
4221 
4222     /* set coloring for diagonal portion */
4223     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4224     for (i=0; i<a->A->cmap->n; i++) {
4225       larray[i] = i + A->cmap->rstart;
4226     }
4227     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4228     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4229     for (i=0; i<a->A->cmap->n; i++) {
4230       colors[i] = coloring->colors[larray[i]];
4231     }
4232     ierr = PetscFree(larray);CHKERRQ(ierr);
4233     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4234     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4235     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4236 
4237     /* set coloring for off-diagonal portion */
4238     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4239     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4240     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4241     for (i=0; i<a->B->cmap->n; i++) {
4242       colors[i] = coloring->colors[larray[i]];
4243     }
4244     ierr = PetscFree(larray);CHKERRQ(ierr);
4245     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4246     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4247     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4248   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4249 
4250   PetscFunctionReturn(0);
4251 }
4252 
4253 #if defined(PETSC_HAVE_ADIC)
4254 #undef __FUNCT__
4255 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4256 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4257 {
4258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4259   PetscErrorCode ierr;
4260 
4261   PetscFunctionBegin;
4262   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4263   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4264   PetscFunctionReturn(0);
4265 }
4266 #endif
4267 
4268 #undef __FUNCT__
4269 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4270 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4271 {
4272   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4273   PetscErrorCode ierr;
4274 
4275   PetscFunctionBegin;
4276   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4277   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4278   PetscFunctionReturn(0);
4279 }
4280 
4281 #undef __FUNCT__
4282 #define __FUNCT__ "MatMergeSymbolic"
4283 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4284 {
4285   PetscErrorCode ierr;
4286   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4287   PetscInt       *indx;
4288 
4289   PetscFunctionBegin;
4290   /* This routine will ONLY return MPIAIJ type matrix */
4291   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4292   if (n == PETSC_DECIDE){
4293     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4294   }
4295   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4296   rstart -= m;
4297 
4298   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4299   for (i=0;i<m;i++) {
4300     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4301     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4302     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4303   }
4304 
4305   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4306   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4307   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4308   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4309   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 #undef __FUNCT__
4314 #define __FUNCT__ "MatMergeNumeric"
4315 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4316 {
4317   PetscErrorCode ierr;
4318   PetscInt       m,N,i,rstart,nnz,Ii;
4319   PetscInt       *indx;
4320   PetscScalar    *values;
4321 
4322   PetscFunctionBegin;
4323   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4324   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4325   for (i=0;i<m;i++) {
4326     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4327     Ii    = i + rstart;
4328     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4329     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4330   }
4331   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4332   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4333   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 #undef __FUNCT__
4338 #define __FUNCT__ "MatMerge"
4339 /*@
4340       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4341                  matrices from each processor
4342 
4343     Collective on MPI_Comm
4344 
4345    Input Parameters:
4346 +    comm - the communicators the parallel matrix will live on
4347 .    inmat - the input sequential matrices
4348 .    n - number of local columns (or PETSC_DECIDE)
4349 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4350 
4351    Output Parameter:
4352 .    outmat - the parallel matrix generated
4353 
4354     Level: advanced
4355 
4356    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4357 
4358 @*/
4359 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4360 {
4361   PetscErrorCode ierr;
4362 
4363   PetscFunctionBegin;
4364   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4365   if (scall == MAT_INITIAL_MATRIX){
4366     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4367   }
4368   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4369   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4370   PetscFunctionReturn(0);
4371 }
4372 
4373 #undef __FUNCT__
4374 #define __FUNCT__ "MatFileSplit"
4375 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4376 {
4377   PetscErrorCode    ierr;
4378   PetscMPIInt       rank;
4379   PetscInt          m,N,i,rstart,nnz;
4380   size_t            len;
4381   const PetscInt    *indx;
4382   PetscViewer       out;
4383   char              *name;
4384   Mat               B;
4385   const PetscScalar *values;
4386 
4387   PetscFunctionBegin;
4388   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4389   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4390   /* Should this be the type of the diagonal block of A? */
4391   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4392   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4393   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4394   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4395   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4396   for (i=0;i<m;i++) {
4397     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4398     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4399     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4400   }
4401   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4402   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4403 
4404   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4405   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4406   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4407   sprintf(name,"%s.%d",outfile,rank);
4408   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4409   ierr = PetscFree(name);
4410   ierr = MatView(B,out);CHKERRQ(ierr);
4411   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4412   ierr = MatDestroy(&B);CHKERRQ(ierr);
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4417 #undef __FUNCT__
4418 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4419 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4420 {
4421   PetscErrorCode       ierr;
4422   Mat_Merge_SeqsToMPI  *merge;
4423   PetscContainer       container;
4424 
4425   PetscFunctionBegin;
4426   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4427   if (container) {
4428     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4430     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4431     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4432     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4433     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4434     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4435     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4436     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4441     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4442     ierr = PetscFree(merge);CHKERRQ(ierr);
4443     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4444   }
4445   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4446   PetscFunctionReturn(0);
4447 }
4448 
4449 #include <../src/mat/utils/freespace.h>
4450 #include <petscbt.h>
4451 
4452 #undef __FUNCT__
4453 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4454 /*@C
4455       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4456                  matrices from each processor
4457 
4458     Collective on MPI_Comm
4459 
4460    Input Parameters:
4461 +    comm - the communicators the parallel matrix will live on
4462 .    seqmat - the input sequential matrices
4463 .    m - number of local rows (or PETSC_DECIDE)
4464 .    n - number of local columns (or PETSC_DECIDE)
4465 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4466 
4467    Output Parameter:
4468 .    mpimat - the parallel matrix generated
4469 
4470     Level: advanced
4471 
4472    Notes:
4473      The dimensions of the sequential matrix in each processor MUST be the same.
4474      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4475      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4476 @*/
4477 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4478 {
4479   PetscErrorCode       ierr;
4480   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4481   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4482   PetscMPIInt          size,rank,taga,*len_s;
4483   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4484   PetscInt             proc,m;
4485   PetscInt             **buf_ri,**buf_rj;
4486   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4487   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4488   MPI_Request          *s_waits,*r_waits;
4489   MPI_Status           *status;
4490   MatScalar            *aa=a->a;
4491   MatScalar            **abuf_r,*ba_i;
4492   Mat_Merge_SeqsToMPI  *merge;
4493   PetscContainer       container;
4494 
4495   PetscFunctionBegin;
4496   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4497 
4498   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4499   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4500 
4501   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4502   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4503 
4504   bi     = merge->bi;
4505   bj     = merge->bj;
4506   buf_ri = merge->buf_ri;
4507   buf_rj = merge->buf_rj;
4508 
4509   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4510   owners = merge->rowmap->range;
4511   len_s  = merge->len_s;
4512 
4513   /* send and recv matrix values */
4514   /*-----------------------------*/
4515   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4516   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4517 
4518   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4519   for (proc=0,k=0; proc<size; proc++){
4520     if (!len_s[proc]) continue;
4521     i = owners[proc];
4522     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4523     k++;
4524   }
4525 
4526   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4527   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4528   ierr = PetscFree(status);CHKERRQ(ierr);
4529 
4530   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4531   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4532 
4533   /* insert mat values of mpimat */
4534   /*----------------------------*/
4535   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4536   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4537 
4538   for (k=0; k<merge->nrecv; k++){
4539     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4540     nrows = *(buf_ri_k[k]);
4541     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4542     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4543   }
4544 
4545   /* set values of ba */
4546   m = merge->rowmap->n;
4547   for (i=0; i<m; i++) {
4548     arow = owners[rank] + i;
4549     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4550     bnzi = bi[i+1] - bi[i];
4551     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4552 
4553     /* add local non-zero vals of this proc's seqmat into ba */
4554     anzi = ai[arow+1] - ai[arow];
4555     aj   = a->j + ai[arow];
4556     aa   = a->a + ai[arow];
4557     nextaj = 0;
4558     for (j=0; nextaj<anzi; j++){
4559       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4560         ba_i[j] += aa[nextaj++];
4561       }
4562     }
4563 
4564     /* add received vals into ba */
4565     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4566       /* i-th row */
4567       if (i == *nextrow[k]) {
4568         anzi = *(nextai[k]+1) - *nextai[k];
4569         aj   = buf_rj[k] + *(nextai[k]);
4570         aa   = abuf_r[k] + *(nextai[k]);
4571         nextaj = 0;
4572         for (j=0; nextaj<anzi; j++){
4573           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4574             ba_i[j] += aa[nextaj++];
4575           }
4576         }
4577         nextrow[k]++; nextai[k]++;
4578       }
4579     }
4580     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4581   }
4582   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4583   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4584 
4585   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4586   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4587   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4588   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4589   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4590   PetscFunctionReturn(0);
4591 }
4592 
4593 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4594 
4595 #undef __FUNCT__
4596 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4597 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4598 {
4599   PetscErrorCode       ierr;
4600   Mat                  B_mpi;
4601   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4602   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4603   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4604   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4605   PetscInt             len,proc,*dnz,*onz;
4606   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4607   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4608   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4609   MPI_Status           *status;
4610   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4611   PetscBT              lnkbt;
4612   Mat_Merge_SeqsToMPI  *merge;
4613   PetscContainer       container;
4614 
4615   PetscFunctionBegin;
4616   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4617 
4618   /* make sure it is a PETSc comm */
4619   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4620   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4621   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4622 
4623   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4624   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4625 
4626   /* determine row ownership */
4627   /*---------------------------------------------------------*/
4628   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4629   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4630   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4631   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4632   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4633   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4634   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4635 
4636   m      = merge->rowmap->n;
4637   M      = merge->rowmap->N;
4638   owners = merge->rowmap->range;
4639 
4640   /* determine the number of messages to send, their lengths */
4641   /*---------------------------------------------------------*/
4642   len_s  = merge->len_s;
4643 
4644   len = 0;  /* length of buf_si[] */
4645   merge->nsend = 0;
4646   for (proc=0; proc<size; proc++){
4647     len_si[proc] = 0;
4648     if (proc == rank){
4649       len_s[proc] = 0;
4650     } else {
4651       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4652       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4653     }
4654     if (len_s[proc]) {
4655       merge->nsend++;
4656       nrows = 0;
4657       for (i=owners[proc]; i<owners[proc+1]; i++){
4658         if (ai[i+1] > ai[i]) nrows++;
4659       }
4660       len_si[proc] = 2*(nrows+1);
4661       len += len_si[proc];
4662     }
4663   }
4664 
4665   /* determine the number and length of messages to receive for ij-structure */
4666   /*-------------------------------------------------------------------------*/
4667   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4668   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4669 
4670   /* post the Irecv of j-structure */
4671   /*-------------------------------*/
4672   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4673   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4674 
4675   /* post the Isend of j-structure */
4676   /*--------------------------------*/
4677   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4678 
4679   for (proc=0, k=0; proc<size; proc++){
4680     if (!len_s[proc]) continue;
4681     i = owners[proc];
4682     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4683     k++;
4684   }
4685 
4686   /* receives and sends of j-structure are complete */
4687   /*------------------------------------------------*/
4688   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4689   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4690 
4691   /* send and recv i-structure */
4692   /*---------------------------*/
4693   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4694   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4695 
4696   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4697   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4698   for (proc=0,k=0; proc<size; proc++){
4699     if (!len_s[proc]) continue;
4700     /* form outgoing message for i-structure:
4701          buf_si[0]:                 nrows to be sent
4702                [1:nrows]:           row index (global)
4703                [nrows+1:2*nrows+1]: i-structure index
4704     */
4705     /*-------------------------------------------*/
4706     nrows = len_si[proc]/2 - 1;
4707     buf_si_i    = buf_si + nrows+1;
4708     buf_si[0]   = nrows;
4709     buf_si_i[0] = 0;
4710     nrows = 0;
4711     for (i=owners[proc]; i<owners[proc+1]; i++){
4712       anzi = ai[i+1] - ai[i];
4713       if (anzi) {
4714         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4715         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4716         nrows++;
4717       }
4718     }
4719     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4720     k++;
4721     buf_si += len_si[proc];
4722   }
4723 
4724   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4725   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4726 
4727   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4728   for (i=0; i<merge->nrecv; i++){
4729     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4730   }
4731 
4732   ierr = PetscFree(len_si);CHKERRQ(ierr);
4733   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4734   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4735   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4736   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4737   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4738   ierr = PetscFree(status);CHKERRQ(ierr);
4739 
4740   /* compute a local seq matrix in each processor */
4741   /*----------------------------------------------*/
4742   /* allocate bi array and free space for accumulating nonzero column info */
4743   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4744   bi[0] = 0;
4745 
4746   /* create and initialize a linked list */
4747   nlnk = N+1;
4748   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4749 
4750   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4751   len = 0;
4752   len  = ai[owners[rank+1]] - ai[owners[rank]];
4753   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4754   current_space = free_space;
4755 
4756   /* determine symbolic info for each local row */
4757   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4758 
4759   for (k=0; k<merge->nrecv; k++){
4760     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4761     nrows = *buf_ri_k[k];
4762     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4763     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4764   }
4765 
4766   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4767   len = 0;
4768   for (i=0;i<m;i++) {
4769     bnzi   = 0;
4770     /* add local non-zero cols of this proc's seqmat into lnk */
4771     arow   = owners[rank] + i;
4772     anzi   = ai[arow+1] - ai[arow];
4773     aj     = a->j + ai[arow];
4774     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4775     bnzi += nlnk;
4776     /* add received col data into lnk */
4777     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4778       if (i == *nextrow[k]) { /* i-th row */
4779         anzi = *(nextai[k]+1) - *nextai[k];
4780         aj   = buf_rj[k] + *nextai[k];
4781         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4782         bnzi += nlnk;
4783         nextrow[k]++; nextai[k]++;
4784       }
4785     }
4786     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4787 
4788     /* if free space is not available, make more free space */
4789     if (current_space->local_remaining<bnzi) {
4790       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4791       nspacedouble++;
4792     }
4793     /* copy data into free space, then initialize lnk */
4794     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4795     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4796 
4797     current_space->array           += bnzi;
4798     current_space->local_used      += bnzi;
4799     current_space->local_remaining -= bnzi;
4800 
4801     bi[i+1] = bi[i] + bnzi;
4802   }
4803 
4804   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4805 
4806   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4807   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4808   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4809 
4810   /* create symbolic parallel matrix B_mpi */
4811   /*---------------------------------------*/
4812   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4813   if (n==PETSC_DECIDE) {
4814     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4815   } else {
4816     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4817   }
4818   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4819   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4820   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4821 
4822   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4823   B_mpi->assembled     = PETSC_FALSE;
4824   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4825   merge->bi            = bi;
4826   merge->bj            = bj;
4827   merge->buf_ri        = buf_ri;
4828   merge->buf_rj        = buf_rj;
4829   merge->coi           = PETSC_NULL;
4830   merge->coj           = PETSC_NULL;
4831   merge->owners_co     = PETSC_NULL;
4832 
4833   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4834 
4835   /* attach the supporting struct to B_mpi for reuse */
4836   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4837   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4838   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4839   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4840   *mpimat = B_mpi;
4841 
4842   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4843   PetscFunctionReturn(0);
4844 }
4845 
4846 #undef __FUNCT__
4847 #define __FUNCT__ "MatMerge_SeqsToMPI"
4848 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4849 {
4850   PetscErrorCode   ierr;
4851 
4852   PetscFunctionBegin;
4853   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4854   if (scall == MAT_INITIAL_MATRIX){
4855     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4856   }
4857   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4858   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4859   PetscFunctionReturn(0);
4860 }
4861 
4862 #undef __FUNCT__
4863 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4864 /*@
4865      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4866           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4867           with MatGetSize()
4868 
4869     Not Collective
4870 
4871    Input Parameters:
4872 +    A - the matrix
4873 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4874 
4875    Output Parameter:
4876 .    A_loc - the local sequential matrix generated
4877 
4878     Level: developer
4879 
4880 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4881 
4882 @*/
4883 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4884 {
4885   PetscErrorCode  ierr;
4886   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4887   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4888   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4889   MatScalar       *aa=a->a,*ba=b->a,*cam;
4890   PetscScalar     *ca;
4891   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4892   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4893   PetscBool       match;
4894 
4895   PetscFunctionBegin;
4896   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4897   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4898   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4899   if (scall == MAT_INITIAL_MATRIX){
4900     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4901     ci[0] = 0;
4902     for (i=0; i<am; i++){
4903       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4904     }
4905     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4906     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4907     k = 0;
4908     for (i=0; i<am; i++) {
4909       ncols_o = bi[i+1] - bi[i];
4910       ncols_d = ai[i+1] - ai[i];
4911       /* off-diagonal portion of A */
4912       for (jo=0; jo<ncols_o; jo++) {
4913         col = cmap[*bj];
4914         if (col >= cstart) break;
4915         cj[k]   = col; bj++;
4916         ca[k++] = *ba++;
4917       }
4918       /* diagonal portion of A */
4919       for (j=0; j<ncols_d; j++) {
4920         cj[k]   = cstart + *aj++;
4921         ca[k++] = *aa++;
4922       }
4923       /* off-diagonal portion of A */
4924       for (j=jo; j<ncols_o; j++) {
4925         cj[k]   = cmap[*bj++];
4926         ca[k++] = *ba++;
4927       }
4928     }
4929     /* put together the new matrix */
4930     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4931     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4932     /* Since these are PETSc arrays, change flags to free them as necessary. */
4933     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4934     mat->free_a  = PETSC_TRUE;
4935     mat->free_ij = PETSC_TRUE;
4936     mat->nonew   = 0;
4937   } else if (scall == MAT_REUSE_MATRIX){
4938     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4939     ci = mat->i; cj = mat->j; cam = mat->a;
4940     for (i=0; i<am; i++) {
4941       /* off-diagonal portion of A */
4942       ncols_o = bi[i+1] - bi[i];
4943       for (jo=0; jo<ncols_o; jo++) {
4944         col = cmap[*bj];
4945         if (col >= cstart) break;
4946         *cam++ = *ba++; bj++;
4947       }
4948       /* diagonal portion of A */
4949       ncols_d = ai[i+1] - ai[i];
4950       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4951       /* off-diagonal portion of A */
4952       for (j=jo; j<ncols_o; j++) {
4953         *cam++ = *ba++; bj++;
4954       }
4955     }
4956   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4957   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4958   PetscFunctionReturn(0);
4959 }
4960 
4961 #undef __FUNCT__
4962 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4963 /*@C
4964      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4965 
4966     Not Collective
4967 
4968    Input Parameters:
4969 +    A - the matrix
4970 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4971 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4972 
4973    Output Parameter:
4974 .    A_loc - the local sequential matrix generated
4975 
4976     Level: developer
4977 
4978 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4979 
4980 @*/
4981 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4982 {
4983   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4984   PetscErrorCode    ierr;
4985   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4986   IS                isrowa,iscola;
4987   Mat               *aloc;
4988   PetscBool       match;
4989 
4990   PetscFunctionBegin;
4991   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4992   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4993   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4994   if (!row){
4995     start = A->rmap->rstart; end = A->rmap->rend;
4996     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4997   } else {
4998     isrowa = *row;
4999   }
5000   if (!col){
5001     start = A->cmap->rstart;
5002     cmap  = a->garray;
5003     nzA   = a->A->cmap->n;
5004     nzB   = a->B->cmap->n;
5005     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5006     ncols = 0;
5007     for (i=0; i<nzB; i++) {
5008       if (cmap[i] < start) idx[ncols++] = cmap[i];
5009       else break;
5010     }
5011     imark = i;
5012     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5013     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5014     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5015   } else {
5016     iscola = *col;
5017   }
5018   if (scall != MAT_INITIAL_MATRIX){
5019     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5020     aloc[0] = *A_loc;
5021   }
5022   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5023   *A_loc = aloc[0];
5024   ierr = PetscFree(aloc);CHKERRQ(ierr);
5025   if (!row){
5026     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5027   }
5028   if (!col){
5029     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5030   }
5031   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5032   PetscFunctionReturn(0);
5033 }
5034 
5035 #undef __FUNCT__
5036 #define __FUNCT__ "MatGetBrowsOfAcols"
5037 /*@C
5038     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5039 
5040     Collective on Mat
5041 
5042    Input Parameters:
5043 +    A,B - the matrices in mpiaij format
5044 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5045 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5046 
5047    Output Parameter:
5048 +    rowb, colb - index sets of rows and columns of B to extract
5049 -    B_seq - the sequential matrix generated
5050 
5051     Level: developer
5052 
5053 @*/
5054 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5055 {
5056   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5057   PetscErrorCode    ierr;
5058   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5059   IS                isrowb,iscolb;
5060   Mat               *bseq=PETSC_NULL;
5061 
5062   PetscFunctionBegin;
5063   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5064     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5065   }
5066   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5067 
5068   if (scall == MAT_INITIAL_MATRIX){
5069     start = A->cmap->rstart;
5070     cmap  = a->garray;
5071     nzA   = a->A->cmap->n;
5072     nzB   = a->B->cmap->n;
5073     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5074     ncols = 0;
5075     for (i=0; i<nzB; i++) {  /* row < local row index */
5076       if (cmap[i] < start) idx[ncols++] = cmap[i];
5077       else break;
5078     }
5079     imark = i;
5080     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5081     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5082     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5083     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5084   } else {
5085     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5086     isrowb = *rowb; iscolb = *colb;
5087     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5088     bseq[0] = *B_seq;
5089   }
5090   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5091   *B_seq = bseq[0];
5092   ierr = PetscFree(bseq);CHKERRQ(ierr);
5093   if (!rowb){
5094     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5095   } else {
5096     *rowb = isrowb;
5097   }
5098   if (!colb){
5099     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5100   } else {
5101     *colb = iscolb;
5102   }
5103   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5104   PetscFunctionReturn(0);
5105 }
5106 
5107 #undef __FUNCT__
5108 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5109 /*
5110     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5111     of the OFF-DIAGONAL portion of local A
5112 
5113     Collective on Mat
5114 
5115    Input Parameters:
5116 +    A,B - the matrices in mpiaij format
5117 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5118 
5119    Output Parameter:
5120 +    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5121 .    startsj_r - similar to startsj for receives
5122 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5123 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5124 
5125     Level: developer
5126 
5127 */
5128 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5129 {
5130   VecScatter_MPI_General *gen_to,*gen_from;
5131   PetscErrorCode         ierr;
5132   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5133   Mat_SeqAIJ             *b_oth;
5134   VecScatter             ctx=a->Mvctx;
5135   MPI_Comm               comm=((PetscObject)ctx)->comm;
5136   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5137   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5138   PetscScalar            *rvalues,*svalues;
5139   MatScalar              *b_otha,*bufa,*bufA;
5140   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5141   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5142   MPI_Status             *sstatus,rstatus;
5143   PetscMPIInt            jj;
5144   PetscInt               *cols,sbs,rbs;
5145   PetscScalar            *vals;
5146 
5147   PetscFunctionBegin;
5148   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5149     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5150   }
5151   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5152   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5153 
5154   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5155   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5156   rvalues  = gen_from->values; /* holds the length of receiving row */
5157   svalues  = gen_to->values;   /* holds the length of sending row */
5158   nrecvs   = gen_from->n;
5159   nsends   = gen_to->n;
5160 
5161   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5162   srow     = gen_to->indices;   /* local row index to be sent */
5163   sstarts  = gen_to->starts;
5164   sprocs   = gen_to->procs;
5165   sstatus  = gen_to->sstatus;
5166   sbs      = gen_to->bs;
5167   rstarts  = gen_from->starts;
5168   rprocs   = gen_from->procs;
5169   rbs      = gen_from->bs;
5170 
5171   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5172   if (scall == MAT_INITIAL_MATRIX){
5173     /* i-array */
5174     /*---------*/
5175     /*  post receives */
5176     for (i=0; i<nrecvs; i++){
5177       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5178       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5179       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5180     }
5181 
5182     /* pack the outgoing message */
5183     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5184     sstartsj[0] = 0;  rstartsj[0] = 0;
5185     len = 0; /* total length of j or a array to be sent */
5186     k = 0;
5187     for (i=0; i<nsends; i++){
5188       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5189       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5190       for (j=0; j<nrows; j++) {
5191         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5192         for (l=0; l<sbs; l++){
5193           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5194           rowlen[j*sbs+l] = ncols;
5195           len += ncols;
5196           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5197         }
5198         k++;
5199       }
5200       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5201       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5202     }
5203     /* recvs and sends of i-array are completed */
5204     i = nrecvs;
5205     while (i--) {
5206       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5207     }
5208     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5209 
5210     /* allocate buffers for sending j and a arrays */
5211     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5212     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5213 
5214     /* create i-array of B_oth */
5215     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5216     b_othi[0] = 0;
5217     len = 0; /* total length of j or a array to be received */
5218     k = 0;
5219     for (i=0; i<nrecvs; i++){
5220       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5221       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5222       for (j=0; j<nrows; j++) {
5223         b_othi[k+1] = b_othi[k] + rowlen[j];
5224         len += rowlen[j]; k++;
5225       }
5226       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5227     }
5228 
5229     /* allocate space for j and a arrrays of B_oth */
5230     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5231     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5232 
5233     /* j-array */
5234     /*---------*/
5235     /*  post receives of j-array */
5236     for (i=0; i<nrecvs; i++){
5237       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5238       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5239     }
5240 
5241     /* pack the outgoing message j-array */
5242     k = 0;
5243     for (i=0; i<nsends; i++){
5244       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5245       bufJ = bufj+sstartsj[i];
5246       for (j=0; j<nrows; j++) {
5247         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5248         for (ll=0; ll<sbs; ll++){
5249           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5250           for (l=0; l<ncols; l++){
5251             *bufJ++ = cols[l];
5252           }
5253           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5254         }
5255       }
5256       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5257     }
5258 
5259     /* recvs and sends of j-array are completed */
5260     i = nrecvs;
5261     while (i--) {
5262       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5263     }
5264     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5265   } else if (scall == MAT_REUSE_MATRIX){
5266     sstartsj = *startsj;
5267     rstartsj = *startsj_r;
5268     bufa     = *bufa_ptr;
5269     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5270     b_otha   = b_oth->a;
5271   } else {
5272     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5273   }
5274 
5275   /* a-array */
5276   /*---------*/
5277   /*  post receives of a-array */
5278   for (i=0; i<nrecvs; i++){
5279     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5280     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5281   }
5282 
5283   /* pack the outgoing message a-array */
5284   k = 0;
5285   for (i=0; i<nsends; i++){
5286     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5287     bufA = bufa+sstartsj[i];
5288     for (j=0; j<nrows; j++) {
5289       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5290       for (ll=0; ll<sbs; ll++){
5291         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5292         for (l=0; l<ncols; l++){
5293           *bufA++ = vals[l];
5294         }
5295         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5296       }
5297     }
5298     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5299   }
5300   /* recvs and sends of a-array are completed */
5301   i = nrecvs;
5302   while (i--) {
5303     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5304   }
5305   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5306   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5307 
5308   if (scall == MAT_INITIAL_MATRIX){
5309     /* put together the new matrix */
5310     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5311 
5312     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5313     /* Since these are PETSc arrays, change flags to free them as necessary. */
5314     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5315     b_oth->free_a  = PETSC_TRUE;
5316     b_oth->free_ij = PETSC_TRUE;
5317     b_oth->nonew   = 0;
5318 
5319     ierr = PetscFree(bufj);CHKERRQ(ierr);
5320     if (!startsj || !bufa_ptr){
5321       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5322       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5323     } else {
5324       *startsj   = sstartsj;
5325       *startsj_r = rstartsj;
5326       *bufa_ptr  = bufa;
5327     }
5328   }
5329   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5330   PetscFunctionReturn(0);
5331 }
5332 
5333 #undef __FUNCT__
5334 #define __FUNCT__ "MatGetCommunicationStructs"
5335 /*@C
5336   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5337 
5338   Not Collective
5339 
5340   Input Parameters:
5341 . A - The matrix in mpiaij format
5342 
5343   Output Parameter:
5344 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5345 . colmap - A map from global column index to local index into lvec
5346 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5347 
5348   Level: developer
5349 
5350 @*/
5351 #if defined (PETSC_USE_CTABLE)
5352 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5353 #else
5354 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5355 #endif
5356 {
5357   Mat_MPIAIJ *a;
5358 
5359   PetscFunctionBegin;
5360   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5361   PetscValidPointer(lvec, 2);
5362   PetscValidPointer(colmap, 3);
5363   PetscValidPointer(multScatter, 4);
5364   a = (Mat_MPIAIJ *) A->data;
5365   if (lvec) *lvec = a->lvec;
5366   if (colmap) *colmap = a->colmap;
5367   if (multScatter) *multScatter = a->Mvctx;
5368   PetscFunctionReturn(0);
5369 }
5370 
5371 EXTERN_C_BEGIN
5372 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5373 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5374 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5375 EXTERN_C_END
5376 
5377 #undef __FUNCT__
5378 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5379 /*
5380     Computes (B'*A')' since computing B*A directly is untenable
5381 
5382                n                       p                          p
5383         (              )       (              )         (                  )
5384       m (      A       )  *  n (       B      )   =   m (         C        )
5385         (              )       (              )         (                  )
5386 
5387 */
5388 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5389 {
5390   PetscErrorCode     ierr;
5391   Mat                At,Bt,Ct;
5392 
5393   PetscFunctionBegin;
5394   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5395   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5396   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5397   ierr = MatDestroy(&At);CHKERRQ(ierr);
5398   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5399   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5400   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5401   PetscFunctionReturn(0);
5402 }
5403 
5404 #undef __FUNCT__
5405 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5406 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5407 {
5408   PetscErrorCode ierr;
5409   PetscInt       m=A->rmap->n,n=B->cmap->n;
5410   Mat            Cmat;
5411 
5412   PetscFunctionBegin;
5413   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5414   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5415   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5416   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5417   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5418   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5419   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5420   *C   = Cmat;
5421   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5422   PetscFunctionReturn(0);
5423 }
5424 
5425 /* ----------------------------------------------------------------*/
5426 #undef __FUNCT__
5427 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5428 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5429 {
5430   PetscErrorCode ierr;
5431 
5432   PetscFunctionBegin;
5433   if (scall == MAT_INITIAL_MATRIX){
5434     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5435   }
5436   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5437   PetscFunctionReturn(0);
5438 }
5439 
5440 EXTERN_C_BEGIN
5441 #if defined(PETSC_HAVE_MUMPS)
5442 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5443 #endif
5444 #if defined(PETSC_HAVE_PASTIX)
5445 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5446 #endif
5447 #if defined(PETSC_HAVE_SUPERLU_DIST)
5448 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5449 #endif
5450 #if defined(PETSC_HAVE_SPOOLES)
5451 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5452 #endif
5453 EXTERN_C_END
5454 
5455 /*MC
5456    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5457 
5458    Options Database Keys:
5459 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5460 
5461   Level: beginner
5462 
5463 .seealso: MatCreateMPIAIJ()
5464 M*/
5465 
5466 EXTERN_C_BEGIN
5467 #undef __FUNCT__
5468 #define __FUNCT__ "MatCreate_MPIAIJ"
5469 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5470 {
5471   Mat_MPIAIJ     *b;
5472   PetscErrorCode ierr;
5473   PetscMPIInt    size;
5474 
5475   PetscFunctionBegin;
5476   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5477 
5478   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5479   B->data         = (void*)b;
5480   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5481   B->rmap->bs     = 1;
5482   B->assembled    = PETSC_FALSE;
5483 
5484   B->insertmode   = NOT_SET_VALUES;
5485   b->size         = size;
5486   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5487 
5488   /* build cache for off array entries formed */
5489   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5490   b->donotstash  = PETSC_FALSE;
5491   b->colmap      = 0;
5492   b->garray      = 0;
5493   b->roworiented = PETSC_TRUE;
5494 
5495   /* stuff used for matrix vector multiply */
5496   b->lvec      = PETSC_NULL;
5497   b->Mvctx     = PETSC_NULL;
5498 
5499   /* stuff for MatGetRow() */
5500   b->rowindices   = 0;
5501   b->rowvalues    = 0;
5502   b->getrowactive = PETSC_FALSE;
5503 
5504 #if defined(PETSC_HAVE_SPOOLES)
5505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5506                                      "MatGetFactor_mpiaij_spooles",
5507                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5508 #endif
5509 #if defined(PETSC_HAVE_MUMPS)
5510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5511                                      "MatGetFactor_aij_mumps",
5512                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5513 #endif
5514 #if defined(PETSC_HAVE_PASTIX)
5515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5516 					   "MatGetFactor_mpiaij_pastix",
5517 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5518 #endif
5519 #if defined(PETSC_HAVE_SUPERLU_DIST)
5520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5521                                      "MatGetFactor_mpiaij_superlu_dist",
5522                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5523 #endif
5524   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5525                                      "MatStoreValues_MPIAIJ",
5526                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5527   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5528                                      "MatRetrieveValues_MPIAIJ",
5529                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5531 				     "MatGetDiagonalBlock_MPIAIJ",
5532                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5534 				     "MatIsTranspose_MPIAIJ",
5535 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5536   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5537 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5538 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5539   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5540 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5541 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5543 				     "MatDiagonalScaleLocal_MPIAIJ",
5544 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5545   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5546                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5547                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5549                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5550                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5551   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5552                                      "MatConvert_MPIAIJ_MPISBAIJ",
5553                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5555                                      "MatMatMult_MPIDense_MPIAIJ",
5556                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5558                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5559                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5560   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5561                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5562                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5563   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5564   PetscFunctionReturn(0);
5565 }
5566 EXTERN_C_END
5567 
5568 #undef __FUNCT__
5569 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5570 /*@
5571      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5572          and "off-diagonal" part of the matrix in CSR format.
5573 
5574    Collective on MPI_Comm
5575 
5576    Input Parameters:
5577 +  comm - MPI communicator
5578 .  m - number of local rows (Cannot be PETSC_DECIDE)
5579 .  n - This value should be the same as the local size used in creating the
5580        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5581        calculated if N is given) For square matrices n is almost always m.
5582 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5583 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5584 .   i - row indices for "diagonal" portion of matrix
5585 .   j - column indices
5586 .   a - matrix values
5587 .   oi - row indices for "off-diagonal" portion of matrix
5588 .   oj - column indices
5589 -   oa - matrix values
5590 
5591    Output Parameter:
5592 .   mat - the matrix
5593 
5594    Level: advanced
5595 
5596    Notes:
5597        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5598        must free the arrays once the matrix has been destroyed and not before.
5599 
5600        The i and j indices are 0 based
5601 
5602        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5603 
5604        This sets local rows and cannot be used to set off-processor values.
5605 
5606        You cannot later use MatSetValues() to change values in this matrix.
5607 
5608 .keywords: matrix, aij, compressed row, sparse, parallel
5609 
5610 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5611           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5612 @*/
5613 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5614 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5615 {
5616   PetscErrorCode ierr;
5617   Mat_MPIAIJ     *maij;
5618 
5619  PetscFunctionBegin;
5620   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5621   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5622   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5623   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5624   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5625   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5626   maij = (Mat_MPIAIJ*) (*mat)->data;
5627   maij->donotstash     = PETSC_TRUE;
5628   (*mat)->preallocated = PETSC_TRUE;
5629 
5630   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5631   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5632   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5633   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5634 
5635   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5636   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5637 
5638   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5639   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5640   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5641   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5642 
5643   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5644   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5645   PetscFunctionReturn(0);
5646 }
5647 
5648 /*
5649     Special version for direct calls from Fortran
5650 */
5651 #include <private/fortranimpl.h>
5652 
5653 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5654 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5655 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5656 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5657 #endif
5658 
5659 /* Change these macros so can be used in void function */
5660 #undef CHKERRQ
5661 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5662 #undef SETERRQ2
5663 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5664 #undef SETERRQ
5665 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5666 
5667 EXTERN_C_BEGIN
5668 #undef __FUNCT__
5669 #define __FUNCT__ "matsetvaluesmpiaij_"
5670 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5671 {
5672   Mat             mat = *mmat;
5673   PetscInt        m = *mm, n = *mn;
5674   InsertMode      addv = *maddv;
5675   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5676   PetscScalar     value;
5677   PetscErrorCode  ierr;
5678 
5679   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5680   if (mat->insertmode == NOT_SET_VALUES) {
5681     mat->insertmode = addv;
5682   }
5683 #if defined(PETSC_USE_DEBUG)
5684   else if (mat->insertmode != addv) {
5685     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5686   }
5687 #endif
5688   {
5689   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5690   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5691   PetscBool       roworiented = aij->roworiented;
5692 
5693   /* Some Variables required in the macro */
5694   Mat             A = aij->A;
5695   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5696   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5697   MatScalar       *aa = a->a;
5698   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5699   Mat             B = aij->B;
5700   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5701   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5702   MatScalar       *ba = b->a;
5703 
5704   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5705   PetscInt        nonew = a->nonew;
5706   MatScalar       *ap1,*ap2;
5707 
5708   PetscFunctionBegin;
5709   for (i=0; i<m; i++) {
5710     if (im[i] < 0) continue;
5711 #if defined(PETSC_USE_DEBUG)
5712     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5713 #endif
5714     if (im[i] >= rstart && im[i] < rend) {
5715       row      = im[i] - rstart;
5716       lastcol1 = -1;
5717       rp1      = aj + ai[row];
5718       ap1      = aa + ai[row];
5719       rmax1    = aimax[row];
5720       nrow1    = ailen[row];
5721       low1     = 0;
5722       high1    = nrow1;
5723       lastcol2 = -1;
5724       rp2      = bj + bi[row];
5725       ap2      = ba + bi[row];
5726       rmax2    = bimax[row];
5727       nrow2    = bilen[row];
5728       low2     = 0;
5729       high2    = nrow2;
5730 
5731       for (j=0; j<n; j++) {
5732         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5733         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5734         if (in[j] >= cstart && in[j] < cend){
5735           col = in[j] - cstart;
5736           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5737         } else if (in[j] < 0) continue;
5738 #if defined(PETSC_USE_DEBUG)
5739         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5740 #endif
5741         else {
5742           if (mat->was_assembled) {
5743             if (!aij->colmap) {
5744               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5745             }
5746 #if defined (PETSC_USE_CTABLE)
5747             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5748 	    col--;
5749 #else
5750             col = aij->colmap[in[j]] - 1;
5751 #endif
5752             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5753               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5754               col =  in[j];
5755               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5756               B = aij->B;
5757               b = (Mat_SeqAIJ*)B->data;
5758               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5759               rp2      = bj + bi[row];
5760               ap2      = ba + bi[row];
5761               rmax2    = bimax[row];
5762               nrow2    = bilen[row];
5763               low2     = 0;
5764               high2    = nrow2;
5765               bm       = aij->B->rmap->n;
5766               ba = b->a;
5767             }
5768           } else col = in[j];
5769           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5770         }
5771       }
5772     } else {
5773       if (!aij->donotstash) {
5774         if (roworiented) {
5775           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5776         } else {
5777           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5778         }
5779       }
5780     }
5781   }}
5782   PetscFunctionReturnVoid();
5783 }
5784 EXTERN_C_END
5785 
5786