xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 880ab1daf7d939c68ccf6cea32092b2660c3708a)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302   FILE              *file;
1303 
1304   PetscFunctionBegin;
1305   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1306   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1307   nz   = A->nz + B->nz;
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) {
1319       rlen = PetscMax(rlen,range[i+1] - range[i]);
1320     }
1321   } else {
1322     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1323     rlen = mat->rmap->n;
1324   }
1325 
1326   /* load up the local row counts */
1327   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1328   for (i=0; i<mat->rmap->n; i++) {
1329     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1330   }
1331 
1332   /* store the row lengths to the file */
1333   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1334   if (!rank) {
1335     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     for (i=1; i<size; i++) {
1337       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1338       rlen = range[i+1] - range[i];
1339       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1345     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1346     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1349 
1350   /* load up the local column indices */
1351   nzmax = nz; /* )th processor needs space a largest processor needs */
1352   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1353   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1354   cnt  = 0;
1355   for (i=0; i<mat->rmap->n; i++) {
1356     for (j=B->i[i]; j<B->i[i+1]; j++) {
1357       if ( (col = garray[B->j[j]]) > cstart) break;
1358       column_indices[cnt++] = col;
1359     }
1360     for (k=A->i[i]; k<A->i[i+1]; k++) {
1361       column_indices[cnt++] = A->j[k] + cstart;
1362     }
1363     for (; j<B->i[i+1]; j++) {
1364       column_indices[cnt++] = garray[B->j[j]];
1365     }
1366   }
1367   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1368 
1369   /* store the column indices to the file */
1370    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1371   if (!rank) {
1372     MPI_Status status;
1373     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     for (i=1; i<size; i++) {
1375       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1376       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1377       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1378       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1380     }
1381      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1382   } else {
1383     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1384     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1386     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1387   }
1388   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1389 
1390   /* load up the local column values */
1391   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1392   cnt  = 0;
1393   for (i=0; i<mat->rmap->n; i++) {
1394     for (j=B->i[i]; j<B->i[i+1]; j++) {
1395       if ( garray[B->j[j]] > cstart) break;
1396       column_values[cnt++] = B->a[j];
1397     }
1398     for (k=A->i[i]; k<A->i[i+1]; k++) {
1399       column_values[cnt++] = A->a[k];
1400     }
1401     for (; j<B->i[i+1]; j++) {
1402       column_values[cnt++] = B->a[j];
1403     }
1404   }
1405   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1406 
1407   /* store the column values to the file */
1408    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1409   if (!rank) {
1410     MPI_Status status;
1411     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     for (i=1; i<size; i++) {
1413        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1414       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1415       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1416       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1420   } else {
1421     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1422     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1424     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1425   }
1426   ierr = PetscFree(column_values);CHKERRQ(ierr);
1427 
1428   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1429   if (file) {
1430     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1431   }
1432 
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1439 {
1440   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1441   PetscErrorCode    ierr;
1442   PetscMPIInt       rank = aij->rank,size = aij->size;
1443   PetscBool         isdraw,iascii,isbinary;
1444   PetscViewer       sviewer;
1445   PetscViewerFormat format;
1446 
1447   PetscFunctionBegin;
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1451   if (iascii) {
1452     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1453     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1454       MatInfo    info;
1455       PetscBool  inodes;
1456 
1457       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1458       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1461       if (!inodes) {
1462         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1463 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1464       } else {
1465         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1466 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1467       }
1468       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1469       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1470       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1472       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1475       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1476       PetscFunctionReturn(0);
1477     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1478       PetscInt   inodecount,inodelimit,*inodes;
1479       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1480       if (inodes) {
1481         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1482       } else {
1483         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1484       }
1485       PetscFunctionReturn(0);
1486     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1487       PetscFunctionReturn(0);
1488     }
1489   } else if (isbinary) {
1490     if (size == 1) {
1491       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1492       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1493     } else {
1494       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1495     }
1496     PetscFunctionReturn(0);
1497   } else if (isdraw) {
1498     PetscDraw  draw;
1499     PetscBool  isnull;
1500     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1501     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1502   }
1503 
1504   if (size == 1) {
1505     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1506     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1507   } else {
1508     /* assemble the entire matrix onto first processor. */
1509     Mat         A;
1510     Mat_SeqAIJ  *Aloc;
1511     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1512     MatScalar   *a;
1513 
1514     if (mat->rmap->N > 1024) {
1515       PetscBool  flg = PETSC_FALSE;
1516 
1517       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1518       if (!flg) {
1519         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1520       }
1521     }
1522 
1523     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1524     if (!rank) {
1525       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1526     } else {
1527       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1528     }
1529     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1530     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1531     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1532     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1533     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1534 
1535     /* copy over the A part */
1536     Aloc = (Mat_SeqAIJ*)aij->A->data;
1537     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row = mat->rmap->rstart;
1539     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1542       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1543     }
1544     aj = Aloc->j;
1545     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1546 
1547     /* copy over the B part */
1548     Aloc = (Mat_SeqAIJ*)aij->B->data;
1549     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550     row  = mat->rmap->rstart;
1551     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1552     ct   = cols;
1553     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1554     for (i=0; i<m; i++) {
1555       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1556       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1557     }
1558     ierr = PetscFree(ct);CHKERRQ(ierr);
1559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1561     /*
1562        Everyone has to call to draw the matrix since the graphics waits are
1563        synchronized across all processors that share the PetscDraw object
1564     */
1565     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     if (!rank) {
1567       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1568       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1569       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1570       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1571     }
1572     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1573     ierr = MatDestroy(&A);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatView_MPIAIJ"
1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1581 {
1582   PetscErrorCode ierr;
1583   PetscBool      iascii,isdraw,issocket,isbinary;
1584 
1585   PetscFunctionBegin;
1586   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1587   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1588   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1590   if (iascii || isdraw || isbinary || issocket) {
1591     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1592   } else {
1593     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatSOR_MPIAIJ"
1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1601 {
1602   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1603   PetscErrorCode ierr;
1604   Vec            bb1 = 0;
1605   PetscBool      hasop;
1606 
1607   PetscFunctionBegin;
1608   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1609     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1610   }
1611 
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622 
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1651     if (flag & SOR_ZERO_INITIAL_GUESS) {
1652       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1653       its--;
1654     }
1655     while (its--) {
1656       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658 
1659       /* update rhs: bb1 = bb - B*x */
1660       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1661       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1662 
1663       /* local sweep */
1664       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1665     }
1666   }  else if (flag & SOR_EISENSTAT) {
1667     Vec         xx1;
1668 
1669     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1670     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1671 
1672     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674     if (!mat->diag) {
1675       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1676       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1677     }
1678     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1679     if (hasop) {
1680       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1681     } else {
1682       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1683     }
1684     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1685 
1686     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1687 
1688     /* local sweep */
1689     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1691     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1692   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1693 
1694   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatPermute_MPIAIJ"
1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1701 {
1702   MPI_Comm       comm;
1703   PetscInt       first,local_rowsize,local_colsize;
1704   const PetscInt *rows;
1705   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1710   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1711   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1712   /* collect the global row permutation and invert it */
1713   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1714   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1715   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1716   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1717   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1718   /* get the local target indices */
1719   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1720   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1721   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1722   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1723   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1724   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1725   /* the column permutation is so much easier;
1726      make a local version of 'colp' and invert it */
1727   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1728   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1729   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1730   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1731   /* now we just get the submatrix */
1732   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1733   /* clean up */
1734   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1735   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   Mat            A = mat->A,B = mat->B;
1745   PetscErrorCode ierr;
1746   PetscReal      isend[5],irecv[5];
1747 
1748   PetscFunctionBegin;
1749   info->block_size     = 1.0;
1750   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1752   isend[3] = info->memory;  isend[4] = info->mallocs;
1753   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1754   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1755   isend[3] += info->memory;  isend[4] += info->mallocs;
1756   if (flag == MAT_LOCAL) {
1757     info->nz_used      = isend[0];
1758     info->nz_allocated = isend[1];
1759     info->nz_unneeded  = isend[2];
1760     info->memory       = isend[3];
1761     info->mallocs      = isend[4];
1762   } else if (flag == MAT_GLOBAL_MAX) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   } else if (flag == MAT_GLOBAL_SUM) {
1770     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1771     info->nz_used      = irecv[0];
1772     info->nz_allocated = irecv[1];
1773     info->nz_unneeded  = irecv[2];
1774     info->memory       = irecv[3];
1775     info->mallocs      = irecv[4];
1776   }
1777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1778   info->fill_ratio_needed = 0;
1779   info->factor_mallocs    = 0;
1780 
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatSetOption_MPIAIJ"
1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1787 {
1788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1789   PetscErrorCode ierr;
1790 
1791   PetscFunctionBegin;
1792   switch (op) {
1793   case MAT_NEW_NONZERO_LOCATIONS:
1794   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1795   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1796   case MAT_KEEP_NONZERO_PATTERN:
1797   case MAT_NEW_NONZERO_LOCATION_ERR:
1798   case MAT_USE_INODES:
1799   case MAT_IGNORE_ZERO_ENTRIES:
1800     MatCheckPreallocated(A,1);
1801     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1802     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1803     break;
1804   case MAT_ROW_ORIENTED:
1805     a->roworiented = flg;
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1808     break;
1809   case MAT_NEW_DIAGONALS:
1810     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1811     break;
1812   case MAT_IGNORE_OFF_PROC_ENTRIES:
1813     a->donotstash = flg;
1814     break;
1815   case MAT_SPD:
1816     A->spd_set                         = PETSC_TRUE;
1817     A->spd                             = flg;
1818     if (flg) {
1819       A->symmetric                     = PETSC_TRUE;
1820       A->structurally_symmetric        = PETSC_TRUE;
1821       A->symmetric_set                 = PETSC_TRUE;
1822       A->structurally_symmetric_set    = PETSC_TRUE;
1823     }
1824     break;
1825   case MAT_SYMMETRIC:
1826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1827     break;
1828   case MAT_STRUCTURALLY_SYMMETRIC:
1829     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1830     break;
1831   case MAT_HERMITIAN:
1832     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1833     break;
1834   case MAT_SYMMETRY_ETERNAL:
1835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1836     break;
1837 #if defined(PETSC_HAVE_TXPETSCGPU)
1838     // Sorry, I don't know how else to add MatOptions for data specific
1839     // to GPU classes without protecting against it in this case statement.
1840     // If anyone knows of a better way, let me know. Paul
1841   case MAT_DIAGBLOCK_CSR:
1842   case MAT_OFFDIAGBLOCK_CSR:
1843   case MAT_CSR:
1844   case MAT_DIAGBLOCK_DIA:
1845   case MAT_OFFDIAGBLOCK_DIA:
1846   case MAT_DIA:
1847   case MAT_DIAGBLOCK_ELL:
1848   case MAT_OFFDIAGBLOCK_ELL:
1849   case MAT_ELL:
1850   case MAT_DIAGBLOCK_HYB:
1851   case MAT_OFFDIAGBLOCK_HYB:
1852   case MAT_HYB:
1853     /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */
1854     break;
1855 #endif
1856   default:
1857     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1858   }
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatGetRow_MPIAIJ"
1864 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1865 {
1866   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1867   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1868   PetscErrorCode ierr;
1869   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1870   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1871   PetscInt       *cmap,*idx_p;
1872 
1873   PetscFunctionBegin;
1874   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1875   mat->getrowactive = PETSC_TRUE;
1876 
1877   if (!mat->rowvalues && (idx || v)) {
1878     /*
1879         allocate enough space to hold information from the longest row.
1880     */
1881     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1882     PetscInt   max = 1,tmp;
1883     for (i=0; i<matin->rmap->n; i++) {
1884       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1885       if (max < tmp) { max = tmp; }
1886     }
1887     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1888   }
1889 
1890   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1891   lrow = row - rstart;
1892 
1893   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1894   if (!v)   {pvA = 0; pvB = 0;}
1895   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1896   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1897   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1898   nztot = nzA + nzB;
1899 
1900   cmap  = mat->garray;
1901   if (v  || idx) {
1902     if (nztot) {
1903       /* Sort by increasing column numbers, assuming A and B already sorted */
1904       PetscInt imark = -1;
1905       if (v) {
1906         *v = v_p = mat->rowvalues;
1907         for (i=0; i<nzB; i++) {
1908           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1909           else break;
1910         }
1911         imark = i;
1912         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1913         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1914       }
1915       if (idx) {
1916         *idx = idx_p = mat->rowindices;
1917         if (imark > -1) {
1918           for (i=0; i<imark; i++) {
1919             idx_p[i] = cmap[cworkB[i]];
1920           }
1921         } else {
1922           for (i=0; i<nzB; i++) {
1923             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1924             else break;
1925           }
1926           imark = i;
1927         }
1928         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1929         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1930       }
1931     } else {
1932       if (idx) *idx = 0;
1933       if (v)   *v   = 0;
1934     }
1935   }
1936   *nz = nztot;
1937   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1938   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1944 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1945 {
1946   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1947 
1948   PetscFunctionBegin;
1949   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1950   aij->getrowactive = PETSC_FALSE;
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatNorm_MPIAIJ"
1956 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1957 {
1958   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1959   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1960   PetscErrorCode ierr;
1961   PetscInt       i,j,cstart = mat->cmap->rstart;
1962   PetscReal      sum = 0.0;
1963   MatScalar      *v;
1964 
1965   PetscFunctionBegin;
1966   if (aij->size == 1) {
1967     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1968   } else {
1969     if (type == NORM_FROBENIUS) {
1970       v = amat->a;
1971       for (i=0; i<amat->nz; i++) {
1972 #if defined(PETSC_USE_COMPLEX)
1973         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1974 #else
1975         sum += (*v)*(*v); v++;
1976 #endif
1977       }
1978       v = bmat->a;
1979       for (i=0; i<bmat->nz; i++) {
1980 #if defined(PETSC_USE_COMPLEX)
1981         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1982 #else
1983         sum += (*v)*(*v); v++;
1984 #endif
1985       }
1986       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1987       *norm = PetscSqrtReal(*norm);
1988     } else if (type == NORM_1) { /* max column norm */
1989       PetscReal *tmp,*tmp2;
1990       PetscInt  *jj,*garray = aij->garray;
1991       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1992       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1993       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1994       *norm = 0.0;
1995       v = amat->a; jj = amat->j;
1996       for (j=0; j<amat->nz; j++) {
1997         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1998       }
1999       v = bmat->a; jj = bmat->j;
2000       for (j=0; j<bmat->nz; j++) {
2001         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2002       }
2003       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2004       for (j=0; j<mat->cmap->N; j++) {
2005         if (tmp2[j] > *norm) *norm = tmp2[j];
2006       }
2007       ierr = PetscFree(tmp);CHKERRQ(ierr);
2008       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2009     } else if (type == NORM_INFINITY) { /* max row norm */
2010       PetscReal ntemp = 0.0;
2011       for (j=0; j<aij->A->rmap->n; j++) {
2012         v = amat->a + amat->i[j];
2013         sum = 0.0;
2014         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2015           sum += PetscAbsScalar(*v); v++;
2016         }
2017         v = bmat->a + bmat->i[j];
2018         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2019           sum += PetscAbsScalar(*v); v++;
2020         }
2021         if (sum > ntemp) ntemp = sum;
2022       }
2023       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2024     } else {
2025       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2026     }
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatTranspose_MPIAIJ"
2033 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2034 {
2035   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2036   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2037   PetscErrorCode ierr;
2038   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2039   PetscInt       cstart=A->cmap->rstart,ncol;
2040   Mat            B;
2041   MatScalar      *array;
2042 
2043   PetscFunctionBegin;
2044   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2045 
2046   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2047   ai = Aloc->i; aj = Aloc->j;
2048   bi = Bloc->i; bj = Bloc->j;
2049   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2050     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2051     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2052     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2053     for (i=0; i<ai[ma]; i++){
2054       d_nnz[aj[i]] ++;
2055       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2056     }
2057 
2058     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2059     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2060     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2061     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2062     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2063     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2064     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2065   } else {
2066     B = *matout;
2067     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2068     for (i=0; i<ai[ma]; i++){
2069       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2070     }
2071   }
2072 
2073   /* copy over the A part */
2074   array = Aloc->a;
2075   row = A->rmap->rstart;
2076   for (i=0; i<ma; i++) {
2077     ncol = ai[i+1]-ai[i];
2078     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2079     row++; array += ncol; aj += ncol;
2080   }
2081   aj = Aloc->j;
2082   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2083 
2084   /* copy over the B part */
2085   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2086   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2087   array = Bloc->a;
2088   row = A->rmap->rstart;
2089   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2090   cols_tmp = cols;
2091   for (i=0; i<mb; i++) {
2092     ncol = bi[i+1]-bi[i];
2093     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2094     row++; array += ncol; cols_tmp += ncol;
2095   }
2096   ierr = PetscFree(cols);CHKERRQ(ierr);
2097 
2098   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2099   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2100   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2101     *matout = B;
2102   } else {
2103     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2104   }
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2110 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2111 {
2112   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2113   Mat            a = aij->A,b = aij->B;
2114   PetscErrorCode ierr;
2115   PetscInt       s1,s2,s3;
2116 
2117   PetscFunctionBegin;
2118   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2119   if (rr) {
2120     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2121     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2122     /* Overlap communication with computation. */
2123     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2124   }
2125   if (ll) {
2126     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2127     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2128     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2129   }
2130   /* scale  the diagonal block */
2131   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2132 
2133   if (rr) {
2134     /* Do a scatter end and then right scale the off-diagonal block */
2135     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2136     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2137   }
2138 
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2144 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2145 {
2146   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2147   PetscErrorCode ierr;
2148 
2149   PetscFunctionBegin;
2150   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatEqual_MPIAIJ"
2156 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2157 {
2158   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2159   Mat            a,b,c,d;
2160   PetscBool      flg;
2161   PetscErrorCode ierr;
2162 
2163   PetscFunctionBegin;
2164   a = matA->A; b = matA->B;
2165   c = matB->A; d = matB->B;
2166 
2167   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2168   if (flg) {
2169     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2170   }
2171   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatCopy_MPIAIJ"
2177 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2178 {
2179   PetscErrorCode ierr;
2180   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2181   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2182 
2183   PetscFunctionBegin;
2184   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2185   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2186     /* because of the column compression in the off-processor part of the matrix a->B,
2187        the number of columns in a->B and b->B may be different, hence we cannot call
2188        the MatCopy() directly on the two parts. If need be, we can provide a more
2189        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2190        then copying the submatrices */
2191     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2192   } else {
2193     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2194     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatSetUp_MPIAIJ"
2201 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2202 {
2203   PetscErrorCode ierr;
2204 
2205   PetscFunctionBegin;
2206   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2212 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2213 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2214 {
2215   PetscInt          i,m=Y->rmap->N;
2216   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2217   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2218   const PetscInt    *xi = x->i,*yi = y->i;
2219 
2220   PetscFunctionBegin;
2221   /* Set the number of nonzeros in the new matrix */
2222   for(i=0; i<m; i++) {
2223     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2224     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2225     nnz[i] = 0;
2226     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2227       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2228       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2229       nnz[i]++;
2230     }
2231     for (; k<nzy; k++) nnz[i]++;
2232   }
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatAXPY_MPIAIJ"
2238 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2239 {
2240   PetscErrorCode ierr;
2241   PetscInt       i;
2242   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2243   PetscBLASInt   bnz,one=1;
2244   Mat_SeqAIJ     *x,*y;
2245 
2246   PetscFunctionBegin;
2247   if (str == SAME_NONZERO_PATTERN) {
2248     PetscScalar alpha = a;
2249     x = (Mat_SeqAIJ *)xx->A->data;
2250     y = (Mat_SeqAIJ *)yy->A->data;
2251     bnz = PetscBLASIntCast(x->nz);
2252     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2253     x = (Mat_SeqAIJ *)xx->B->data;
2254     y = (Mat_SeqAIJ *)yy->B->data;
2255     bnz = PetscBLASIntCast(x->nz);
2256     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2257   } else if (str == SUBSET_NONZERO_PATTERN) {
2258     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2259 
2260     x = (Mat_SeqAIJ *)xx->B->data;
2261     y = (Mat_SeqAIJ *)yy->B->data;
2262     if (y->xtoy && y->XtoY != xx->B) {
2263       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2264       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2265     }
2266     if (!y->xtoy) { /* get xtoy */
2267       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2268       y->XtoY = xx->B;
2269       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2270     }
2271     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2272   } else {
2273     Mat B;
2274     PetscInt *nnz_d,*nnz_o;
2275     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2276     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2277     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2278     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2279     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2280     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2281     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2282     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2283     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2284     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2285     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2286     ierr = MatHeaderReplace(Y,B);
2287     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2288     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatConjugate_MPIAIJ"
2297 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2298 {
2299 #if defined(PETSC_USE_COMPLEX)
2300   PetscErrorCode ierr;
2301   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2302 
2303   PetscFunctionBegin;
2304   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2305   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2306 #else
2307   PetscFunctionBegin;
2308 #endif
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 #undef __FUNCT__
2313 #define __FUNCT__ "MatRealPart_MPIAIJ"
2314 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2315 {
2316   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2321   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2327 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2328 {
2329   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2330   PetscErrorCode ierr;
2331 
2332   PetscFunctionBegin;
2333   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2334   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #ifdef PETSC_HAVE_PBGL
2339 
2340 #include <boost/parallel/mpi/bsp_process_group.hpp>
2341 #include <boost/graph/distributed/ilu_default_graph.hpp>
2342 #include <boost/graph/distributed/ilu_0_block.hpp>
2343 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2344 #include <boost/graph/distributed/petsc/interface.hpp>
2345 #include <boost/multi_array.hpp>
2346 #include <boost/parallel/distributed_property_map->hpp>
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2350 /*
2351   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2352 */
2353 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2354 {
2355   namespace petsc = boost::distributed::petsc;
2356 
2357   namespace graph_dist = boost::graph::distributed;
2358   using boost::graph::distributed::ilu_default::process_group_type;
2359   using boost::graph::ilu_permuted;
2360 
2361   PetscBool       row_identity, col_identity;
2362   PetscContainer  c;
2363   PetscInt        m, n, M, N;
2364   PetscErrorCode  ierr;
2365 
2366   PetscFunctionBegin;
2367   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2368   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2369   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2370   if (!row_identity || !col_identity) {
2371     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2372   }
2373 
2374   process_group_type pg;
2375   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2376   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2377   lgraph_type&   level_graph = *lgraph_p;
2378   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2379 
2380   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2381   ilu_permuted(level_graph);
2382 
2383   /* put together the new matrix */
2384   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2385   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2386   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2387   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2388   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2389   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2390   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2391   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392 
2393   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2394   ierr = PetscContainerSetPointer(c, lgraph_p);
2395   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2396   ierr = PetscContainerDestroy(&c);
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2402 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2403 {
2404   PetscFunctionBegin;
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatSolve_MPIAIJ"
2410 /*
2411   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2412 */
2413 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2414 {
2415   namespace graph_dist = boost::graph::distributed;
2416 
2417   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2418   lgraph_type*   lgraph_p;
2419   PetscContainer c;
2420   PetscErrorCode ierr;
2421 
2422   PetscFunctionBegin;
2423   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2424   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2425   ierr = VecCopy(b, x);CHKERRQ(ierr);
2426 
2427   PetscScalar* array_x;
2428   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2429   PetscInt sx;
2430   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2431 
2432   PetscScalar* array_b;
2433   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2434   PetscInt sb;
2435   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2436 
2437   lgraph_type&   level_graph = *lgraph_p;
2438   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2439 
2440   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2441   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2442                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2443 
2444   typedef boost::iterator_property_map<array_ref_type::iterator,
2445                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2446   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2447                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2448 
2449   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2450 
2451   PetscFunctionReturn(0);
2452 }
2453 #endif
2454 
2455 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2456   PetscInt       nzlocal,nsends,nrecvs;
2457   PetscMPIInt    *send_rank,*recv_rank;
2458   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2459   PetscScalar    *sbuf_a,**rbuf_a;
2460   PetscErrorCode (*Destroy)(Mat);
2461 } Mat_Redundant;
2462 
2463 #undef __FUNCT__
2464 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2465 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2466 {
2467   PetscErrorCode       ierr;
2468   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2469   PetscInt             i;
2470 
2471   PetscFunctionBegin;
2472   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2473   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2474   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2475   for (i=0; i<redund->nrecvs; i++){
2476     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2477     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2478   }
2479   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2480   ierr = PetscFree(redund);CHKERRQ(ierr);
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatDestroy_MatRedundant"
2486 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2487 {
2488   PetscErrorCode  ierr;
2489   PetscContainer  container;
2490   Mat_Redundant   *redund=PETSC_NULL;
2491 
2492   PetscFunctionBegin;
2493   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2494   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2495   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2496   A->ops->destroy = redund->Destroy;
2497   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2498   if (A->ops->destroy) {
2499     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2500   }
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2506 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2507 {
2508   PetscMPIInt    rank,size;
2509   MPI_Comm       comm=((PetscObject)mat)->comm;
2510   PetscErrorCode ierr;
2511   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2512   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2513   PetscInt       *rowrange=mat->rmap->range;
2514   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2515   Mat            A=aij->A,B=aij->B,C=*matredundant;
2516   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2517   PetscScalar    *sbuf_a;
2518   PetscInt       nzlocal=a->nz+b->nz;
2519   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2520   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2521   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2522   MatScalar      *aworkA,*aworkB;
2523   PetscScalar    *vals;
2524   PetscMPIInt    tag1,tag2,tag3,imdex;
2525   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2526                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2527   MPI_Status     recv_status,*send_status;
2528   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2529   PetscInt       **rbuf_j=PETSC_NULL;
2530   PetscScalar    **rbuf_a=PETSC_NULL;
2531   Mat_Redundant  *redund=PETSC_NULL;
2532   PetscContainer container;
2533 
2534   PetscFunctionBegin;
2535   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2536   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2537 
2538   if (reuse == MAT_REUSE_MATRIX) {
2539     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2540     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2541     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2542     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2543     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2544     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2545     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2546     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2547 
2548     nsends    = redund->nsends;
2549     nrecvs    = redund->nrecvs;
2550     send_rank = redund->send_rank;
2551     recv_rank = redund->recv_rank;
2552     sbuf_nz   = redund->sbuf_nz;
2553     rbuf_nz   = redund->rbuf_nz;
2554     sbuf_j    = redund->sbuf_j;
2555     sbuf_a    = redund->sbuf_a;
2556     rbuf_j    = redund->rbuf_j;
2557     rbuf_a    = redund->rbuf_a;
2558   }
2559 
2560   if (reuse == MAT_INITIAL_MATRIX){
2561     PetscMPIInt  subrank,subsize;
2562     PetscInt     nleftover,np_subcomm;
2563     /* get the destination processors' id send_rank, nsends and nrecvs */
2564     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2565     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2566     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2567     np_subcomm = size/nsubcomm;
2568     nleftover  = size - nsubcomm*np_subcomm;
2569     nsends = 0; nrecvs = 0;
2570     for (i=0; i<size; i++){ /* i=rank*/
2571       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2572         send_rank[nsends] = i; nsends++;
2573         recv_rank[nrecvs++] = i;
2574       }
2575     }
2576     if (rank >= size - nleftover){/* this proc is a leftover processor */
2577       i = size-nleftover-1;
2578       j = 0;
2579       while (j < nsubcomm - nleftover){
2580         send_rank[nsends++] = i;
2581         i--; j++;
2582       }
2583     }
2584 
2585     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2586       for (i=0; i<nleftover; i++){
2587         recv_rank[nrecvs++] = size-nleftover+i;
2588       }
2589     }
2590 
2591     /* allocate sbuf_j, sbuf_a */
2592     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2593     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2594     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2595   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2596 
2597   /* copy mat's local entries into the buffers */
2598   if (reuse == MAT_INITIAL_MATRIX){
2599     rownz_max = 0;
2600     rptr = sbuf_j;
2601     cols = sbuf_j + rend-rstart + 1;
2602     vals = sbuf_a;
2603     rptr[0] = 0;
2604     for (i=0; i<rend-rstart; i++){
2605       row = i + rstart;
2606       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2607       ncols  = nzA + nzB;
2608       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2609       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2610       /* load the column indices for this row into cols */
2611       lwrite = 0;
2612       for (l=0; l<nzB; l++) {
2613         if ((ctmp = bmap[cworkB[l]]) < cstart){
2614           vals[lwrite]   = aworkB[l];
2615           cols[lwrite++] = ctmp;
2616         }
2617       }
2618       for (l=0; l<nzA; l++){
2619         vals[lwrite]   = aworkA[l];
2620         cols[lwrite++] = cstart + cworkA[l];
2621       }
2622       for (l=0; l<nzB; l++) {
2623         if ((ctmp = bmap[cworkB[l]]) >= cend){
2624           vals[lwrite]   = aworkB[l];
2625           cols[lwrite++] = ctmp;
2626         }
2627       }
2628       vals += ncols;
2629       cols += ncols;
2630       rptr[i+1] = rptr[i] + ncols;
2631       if (rownz_max < ncols) rownz_max = ncols;
2632     }
2633     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2634   } else { /* only copy matrix values into sbuf_a */
2635     rptr = sbuf_j;
2636     vals = sbuf_a;
2637     rptr[0] = 0;
2638     for (i=0; i<rend-rstart; i++){
2639       row = i + rstart;
2640       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2641       ncols  = nzA + nzB;
2642       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2643       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2644       lwrite = 0;
2645       for (l=0; l<nzB; l++) {
2646         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2647       }
2648       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2649       for (l=0; l<nzB; l++) {
2650         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2651       }
2652       vals += ncols;
2653       rptr[i+1] = rptr[i] + ncols;
2654     }
2655   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2656 
2657   /* send nzlocal to others, and recv other's nzlocal */
2658   /*--------------------------------------------------*/
2659   if (reuse == MAT_INITIAL_MATRIX){
2660     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2661     s_waits2 = s_waits3 + nsends;
2662     s_waits1 = s_waits2 + nsends;
2663     r_waits1 = s_waits1 + nsends;
2664     r_waits2 = r_waits1 + nrecvs;
2665     r_waits3 = r_waits2 + nrecvs;
2666   } else {
2667     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2668     r_waits3 = s_waits3 + nsends;
2669   }
2670 
2671   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2672   if (reuse == MAT_INITIAL_MATRIX){
2673     /* get new tags to keep the communication clean */
2674     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2675     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2676     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2677 
2678     /* post receives of other's nzlocal */
2679     for (i=0; i<nrecvs; i++){
2680       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2681     }
2682     /* send nzlocal to others */
2683     for (i=0; i<nsends; i++){
2684       sbuf_nz[i] = nzlocal;
2685       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2686     }
2687     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2688     count = nrecvs;
2689     while (count) {
2690       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2691       recv_rank[imdex] = recv_status.MPI_SOURCE;
2692       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2693       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2694 
2695       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2696       rbuf_nz[imdex] += i + 2;
2697       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2698       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2699       count--;
2700     }
2701     /* wait on sends of nzlocal */
2702     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2703     /* send mat->i,j to others, and recv from other's */
2704     /*------------------------------------------------*/
2705     for (i=0; i<nsends; i++){
2706       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2707       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2708     }
2709     /* wait on receives of mat->i,j */
2710     /*------------------------------*/
2711     count = nrecvs;
2712     while (count) {
2713       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2714       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2715       count--;
2716     }
2717     /* wait on sends of mat->i,j */
2718     /*---------------------------*/
2719     if (nsends) {
2720       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2721     }
2722   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2723 
2724   /* post receives, send and receive mat->a */
2725   /*----------------------------------------*/
2726   for (imdex=0; imdex<nrecvs; imdex++) {
2727     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2728   }
2729   for (i=0; i<nsends; i++){
2730     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2731   }
2732   count = nrecvs;
2733   while (count) {
2734     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2735     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2736     count--;
2737   }
2738   if (nsends) {
2739     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2740   }
2741 
2742   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2743 
2744   /* create redundant matrix */
2745   /*-------------------------*/
2746   if (reuse == MAT_INITIAL_MATRIX){
2747     /* compute rownz_max for preallocation */
2748     for (imdex=0; imdex<nrecvs; imdex++){
2749       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2750       rptr = rbuf_j[imdex];
2751       for (i=0; i<j; i++){
2752         ncols = rptr[i+1] - rptr[i];
2753         if (rownz_max < ncols) rownz_max = ncols;
2754       }
2755     }
2756 
2757     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2758     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2759     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2760     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2761     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2762     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2763   } else {
2764     C = *matredundant;
2765   }
2766 
2767   /* insert local matrix entries */
2768   rptr = sbuf_j;
2769   cols = sbuf_j + rend-rstart + 1;
2770   vals = sbuf_a;
2771   for (i=0; i<rend-rstart; i++){
2772     row   = i + rstart;
2773     ncols = rptr[i+1] - rptr[i];
2774     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2775     vals += ncols;
2776     cols += ncols;
2777   }
2778   /* insert received matrix entries */
2779   for (imdex=0; imdex<nrecvs; imdex++){
2780     rstart = rowrange[recv_rank[imdex]];
2781     rend   = rowrange[recv_rank[imdex]+1];
2782     rptr = rbuf_j[imdex];
2783     cols = rbuf_j[imdex] + rend-rstart + 1;
2784     vals = rbuf_a[imdex];
2785     for (i=0; i<rend-rstart; i++){
2786       row   = i + rstart;
2787       ncols = rptr[i+1] - rptr[i];
2788       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2789       vals += ncols;
2790       cols += ncols;
2791     }
2792   }
2793   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2794   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2795   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2796   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2797   if (reuse == MAT_INITIAL_MATRIX) {
2798     PetscContainer container;
2799     *matredundant = C;
2800     /* create a supporting struct and attach it to C for reuse */
2801     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2802     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2803     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2804     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2805     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2806     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2807 
2808     redund->nzlocal = nzlocal;
2809     redund->nsends  = nsends;
2810     redund->nrecvs  = nrecvs;
2811     redund->send_rank = send_rank;
2812     redund->recv_rank = recv_rank;
2813     redund->sbuf_nz = sbuf_nz;
2814     redund->rbuf_nz = rbuf_nz;
2815     redund->sbuf_j  = sbuf_j;
2816     redund->sbuf_a  = sbuf_a;
2817     redund->rbuf_j  = rbuf_j;
2818     redund->rbuf_a  = rbuf_a;
2819 
2820     redund->Destroy = C->ops->destroy;
2821     C->ops->destroy = MatDestroy_MatRedundant;
2822   }
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 #undef __FUNCT__
2827 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2828 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2829 {
2830   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2831   PetscErrorCode ierr;
2832   PetscInt       i,*idxb = 0;
2833   PetscScalar    *va,*vb;
2834   Vec            vtmp;
2835 
2836   PetscFunctionBegin;
2837   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2838   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2839   if (idx) {
2840     for (i=0; i<A->rmap->n; i++) {
2841       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2842     }
2843   }
2844 
2845   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2846   if (idx) {
2847     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2848   }
2849   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2850   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2851 
2852   for (i=0; i<A->rmap->n; i++){
2853     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2854       va[i] = vb[i];
2855       if (idx) idx[i] = a->garray[idxb[i]];
2856     }
2857   }
2858 
2859   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2860   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2861   ierr = PetscFree(idxb);CHKERRQ(ierr);
2862   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #undef __FUNCT__
2867 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2868 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2869 {
2870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2871   PetscErrorCode ierr;
2872   PetscInt       i,*idxb = 0;
2873   PetscScalar    *va,*vb;
2874   Vec            vtmp;
2875 
2876   PetscFunctionBegin;
2877   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2878   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2879   if (idx) {
2880     for (i=0; i<A->cmap->n; i++) {
2881       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2882     }
2883   }
2884 
2885   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2886   if (idx) {
2887     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2888   }
2889   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2890   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2891 
2892   for (i=0; i<A->rmap->n; i++){
2893     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2894       va[i] = vb[i];
2895       if (idx) idx[i] = a->garray[idxb[i]];
2896     }
2897   }
2898 
2899   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2900   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2901   ierr = PetscFree(idxb);CHKERRQ(ierr);
2902   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2903   PetscFunctionReturn(0);
2904 }
2905 
2906 #undef __FUNCT__
2907 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2908 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2909 {
2910   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2911   PetscInt       n      = A->rmap->n;
2912   PetscInt       cstart = A->cmap->rstart;
2913   PetscInt      *cmap   = mat->garray;
2914   PetscInt      *diagIdx, *offdiagIdx;
2915   Vec            diagV, offdiagV;
2916   PetscScalar   *a, *diagA, *offdiagA;
2917   PetscInt       r;
2918   PetscErrorCode ierr;
2919 
2920   PetscFunctionBegin;
2921   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2922   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2923   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2924   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2925   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2926   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2927   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2928   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2929   for(r = 0; r < n; ++r) {
2930     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2931       a[r]   = diagA[r];
2932       idx[r] = cstart + diagIdx[r];
2933     } else {
2934       a[r]   = offdiagA[r];
2935       idx[r] = cmap[offdiagIdx[r]];
2936     }
2937   }
2938   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2939   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2940   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2941   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2942   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2943   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 #undef __FUNCT__
2948 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2949 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2950 {
2951   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2952   PetscInt       n      = A->rmap->n;
2953   PetscInt       cstart = A->cmap->rstart;
2954   PetscInt      *cmap   = mat->garray;
2955   PetscInt      *diagIdx, *offdiagIdx;
2956   Vec            diagV, offdiagV;
2957   PetscScalar   *a, *diagA, *offdiagA;
2958   PetscInt       r;
2959   PetscErrorCode ierr;
2960 
2961   PetscFunctionBegin;
2962   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2963   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2964   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2965   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2966   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2967   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2968   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2969   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2970   for(r = 0; r < n; ++r) {
2971     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2972       a[r]   = diagA[r];
2973       idx[r] = cstart + diagIdx[r];
2974     } else {
2975       a[r]   = offdiagA[r];
2976       idx[r] = cmap[offdiagIdx[r]];
2977     }
2978   }
2979   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2980   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2981   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2982   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2983   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2984   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 #undef __FUNCT__
2989 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2990 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2991 {
2992   PetscErrorCode ierr;
2993   Mat            *dummy;
2994 
2995   PetscFunctionBegin;
2996   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2997   *newmat = *dummy;
2998   ierr = PetscFree(dummy);CHKERRQ(ierr);
2999   PetscFunctionReturn(0);
3000 }
3001 
3002 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3003 
3004 #undef __FUNCT__
3005 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3006 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3007 {
3008   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3009   PetscErrorCode ierr;
3010 
3011   PetscFunctionBegin;
3012   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 
3017 /* -------------------------------------------------------------------*/
3018 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3019        MatGetRow_MPIAIJ,
3020        MatRestoreRow_MPIAIJ,
3021        MatMult_MPIAIJ,
3022 /* 4*/ MatMultAdd_MPIAIJ,
3023        MatMultTranspose_MPIAIJ,
3024        MatMultTransposeAdd_MPIAIJ,
3025 #ifdef PETSC_HAVE_PBGL
3026        MatSolve_MPIAIJ,
3027 #else
3028        0,
3029 #endif
3030        0,
3031        0,
3032 /*10*/ 0,
3033        0,
3034        0,
3035        MatSOR_MPIAIJ,
3036        MatTranspose_MPIAIJ,
3037 /*15*/ MatGetInfo_MPIAIJ,
3038        MatEqual_MPIAIJ,
3039        MatGetDiagonal_MPIAIJ,
3040        MatDiagonalScale_MPIAIJ,
3041        MatNorm_MPIAIJ,
3042 /*20*/ MatAssemblyBegin_MPIAIJ,
3043        MatAssemblyEnd_MPIAIJ,
3044        MatSetOption_MPIAIJ,
3045        MatZeroEntries_MPIAIJ,
3046 /*24*/ MatZeroRows_MPIAIJ,
3047        0,
3048 #ifdef PETSC_HAVE_PBGL
3049        0,
3050 #else
3051        0,
3052 #endif
3053        0,
3054        0,
3055 /*29*/ MatSetUp_MPIAIJ,
3056 #ifdef PETSC_HAVE_PBGL
3057        0,
3058 #else
3059        0,
3060 #endif
3061        0,
3062        0,
3063        0,
3064 /*34*/ MatDuplicate_MPIAIJ,
3065        0,
3066        0,
3067        0,
3068        0,
3069 /*39*/ MatAXPY_MPIAIJ,
3070        MatGetSubMatrices_MPIAIJ,
3071        MatIncreaseOverlap_MPIAIJ,
3072        MatGetValues_MPIAIJ,
3073        MatCopy_MPIAIJ,
3074 /*44*/ MatGetRowMax_MPIAIJ,
3075        MatScale_MPIAIJ,
3076        0,
3077        0,
3078        MatZeroRowsColumns_MPIAIJ,
3079 /*49*/ 0,
3080        0,
3081        0,
3082        0,
3083        0,
3084 /*54*/ MatFDColoringCreate_MPIAIJ,
3085        0,
3086        MatSetUnfactored_MPIAIJ,
3087        0, /* MatPermute_MPIAIJ, impl currently broken */
3088        0,
3089 /*59*/ MatGetSubMatrix_MPIAIJ,
3090        MatDestroy_MPIAIJ,
3091        MatView_MPIAIJ,
3092        0,
3093        0,
3094 /*64*/ 0,
3095        0,
3096        0,
3097        0,
3098        0,
3099 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3100        MatGetRowMinAbs_MPIAIJ,
3101        0,
3102        MatSetColoring_MPIAIJ,
3103 #if defined(PETSC_HAVE_ADIC)
3104        MatSetValuesAdic_MPIAIJ,
3105 #else
3106        0,
3107 #endif
3108        MatSetValuesAdifor_MPIAIJ,
3109 /*75*/ MatFDColoringApply_AIJ,
3110        0,
3111        0,
3112        0,
3113        0,
3114 /*80*/ 0,
3115        0,
3116        0,
3117 /*83*/ MatLoad_MPIAIJ,
3118        0,
3119        0,
3120        0,
3121        0,
3122        0,
3123 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3124        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3125        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3126        MatPtAP_Basic,
3127        MatPtAPSymbolic_MPIAIJ,
3128 /*94*/ MatPtAPNumeric_MPIAIJ,
3129        0,
3130        0,
3131        0,
3132        0,
3133 /*99*/ 0,
3134        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3135        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3136        MatConjugate_MPIAIJ,
3137        0,
3138 /*104*/MatSetValuesRow_MPIAIJ,
3139        MatRealPart_MPIAIJ,
3140        MatImaginaryPart_MPIAIJ,
3141        0,
3142        0,
3143 /*109*/0,
3144        MatGetRedundantMatrix_MPIAIJ,
3145        MatGetRowMin_MPIAIJ,
3146        0,
3147        0,
3148 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3149        0,
3150        0,
3151        0,
3152        0,
3153 /*119*/0,
3154        0,
3155        0,
3156        0,
3157        MatGetMultiProcBlock_MPIAIJ,
3158 /*124*/MatFindNonZeroRows_MPIAIJ,
3159        MatGetColumnNorms_MPIAIJ,
3160        MatInvertBlockDiagonal_MPIAIJ,
3161        0,
3162        MatGetSubMatricesParallel_MPIAIJ,
3163 /*129*/0,
3164        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3165        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3166        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3167        0,
3168 /*134*/0,
3169        0,
3170        0,
3171        0,
3172        0
3173 };
3174 
3175 /* ----------------------------------------------------------------------------------------*/
3176 
3177 EXTERN_C_BEGIN
3178 #undef __FUNCT__
3179 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3180 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3181 {
3182   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3183   PetscErrorCode ierr;
3184 
3185   PetscFunctionBegin;
3186   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3187   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3188   PetscFunctionReturn(0);
3189 }
3190 EXTERN_C_END
3191 
3192 EXTERN_C_BEGIN
3193 #undef __FUNCT__
3194 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3195 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3196 {
3197   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3198   PetscErrorCode ierr;
3199 
3200   PetscFunctionBegin;
3201   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3202   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3203   PetscFunctionReturn(0);
3204 }
3205 EXTERN_C_END
3206 
3207 EXTERN_C_BEGIN
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3210 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3211 {
3212   Mat_MPIAIJ     *b;
3213   PetscErrorCode ierr;
3214   PetscInt       i;
3215   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3216 
3217   PetscFunctionBegin;
3218   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3219   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3220   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3221   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3222   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3223   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3224 
3225   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3226   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3227   if (d_nnz) {
3228     for (i=0; i<B->rmap->n; i++) {
3229       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3230     }
3231   }
3232   if (o_nnz) {
3233     for (i=0; i<B->rmap->n; i++) {
3234       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3235     }
3236   }
3237   b = (Mat_MPIAIJ*)B->data;
3238 
3239   if (!B->preallocated) {
3240     /* Explicitly create 2 MATSEQAIJ matrices. */
3241     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3242     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3243     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3244     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3245     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3246     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3247     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3248     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3249     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3250     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3251   }
3252 
3253   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3254   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3255   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3256   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3257   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3258   B->preallocated = PETSC_TRUE;
3259   PetscFunctionReturn(0);
3260 }
3261 EXTERN_C_END
3262 
3263 #undef __FUNCT__
3264 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3265 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3266 {
3267   Mat            mat;
3268   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3269   PetscErrorCode ierr;
3270 
3271   PetscFunctionBegin;
3272   *newmat       = 0;
3273   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3274   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3275   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3276   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3277   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3278   a    = (Mat_MPIAIJ*)mat->data;
3279 
3280   mat->factortype    = matin->factortype;
3281   mat->rmap->bs      = matin->rmap->bs;
3282   mat->cmap->bs      = matin->cmap->bs;
3283   mat->assembled    = PETSC_TRUE;
3284   mat->insertmode   = NOT_SET_VALUES;
3285   mat->preallocated = PETSC_TRUE;
3286 
3287   a->size           = oldmat->size;
3288   a->rank           = oldmat->rank;
3289   a->donotstash     = oldmat->donotstash;
3290   a->roworiented    = oldmat->roworiented;
3291   a->rowindices     = 0;
3292   a->rowvalues      = 0;
3293   a->getrowactive   = PETSC_FALSE;
3294 
3295   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3296   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3297 
3298   if (oldmat->colmap) {
3299 #if defined (PETSC_USE_CTABLE)
3300     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3301 #else
3302     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3303     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3304     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3305 #endif
3306   } else a->colmap = 0;
3307   if (oldmat->garray) {
3308     PetscInt len;
3309     len  = oldmat->B->cmap->n;
3310     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3311     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3312     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3313   } else a->garray = 0;
3314 
3315   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3316   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3317   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3318   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3319   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3320   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3321   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3322   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3323   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3324   *newmat = mat;
3325   PetscFunctionReturn(0);
3326 }
3327 
3328 
3329 
3330 #undef __FUNCT__
3331 #define __FUNCT__ "MatLoad_MPIAIJ"
3332 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3333 {
3334   PetscScalar    *vals,*svals;
3335   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3336   PetscErrorCode ierr;
3337   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3338   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3339   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3340   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3341   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3342   int            fd;
3343   PetscInt       bs = 1;
3344 
3345   PetscFunctionBegin;
3346   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3347   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3348   if (!rank) {
3349     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3350     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3351     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3352   }
3353 
3354   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3355   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3356   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3357 
3358   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3359 
3360   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3361   M = header[1]; N = header[2];
3362   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3363   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3364   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3365 
3366   /* If global sizes are set, check if they are consistent with that given in the file */
3367   if (sizesset) {
3368     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3369   }
3370   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3371   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3372 
3373   /* determine ownership of all (block) rows */
3374   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3375   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3376   else m = newMat->rmap->n; /* Set by user */
3377 
3378   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3379   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3380 
3381   /* First process needs enough room for process with most rows */
3382   if (!rank) {
3383     mmax       = rowners[1];
3384     for (i=2; i<size; i++) {
3385       mmax = PetscMax(mmax,rowners[i]);
3386     }
3387   } else mmax = m;
3388 
3389   rowners[0] = 0;
3390   for (i=2; i<=size; i++) {
3391     rowners[i] += rowners[i-1];
3392   }
3393   rstart = rowners[rank];
3394   rend   = rowners[rank+1];
3395 
3396   /* distribute row lengths to all processors */
3397   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3398   if (!rank) {
3399     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3400     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3401     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3402     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3403     for (j=0; j<m; j++) {
3404       procsnz[0] += ourlens[j];
3405     }
3406     for (i=1; i<size; i++) {
3407       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3408       /* calculate the number of nonzeros on each processor */
3409       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3410         procsnz[i] += rowlengths[j];
3411       }
3412       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3413     }
3414     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3415   } else {
3416     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3417   }
3418 
3419   if (!rank) {
3420     /* determine max buffer needed and allocate it */
3421     maxnz = 0;
3422     for (i=0; i<size; i++) {
3423       maxnz = PetscMax(maxnz,procsnz[i]);
3424     }
3425     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3426 
3427     /* read in my part of the matrix column indices  */
3428     nz   = procsnz[0];
3429     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3430     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3431 
3432     /* read in every one elses and ship off */
3433     for (i=1; i<size; i++) {
3434       nz     = procsnz[i];
3435       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3436       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3437     }
3438     ierr = PetscFree(cols);CHKERRQ(ierr);
3439   } else {
3440     /* determine buffer space needed for message */
3441     nz = 0;
3442     for (i=0; i<m; i++) {
3443       nz += ourlens[i];
3444     }
3445     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3446 
3447     /* receive message of column indices*/
3448     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3449   }
3450 
3451   /* determine column ownership if matrix is not square */
3452   if (N != M) {
3453     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3454     else n = newMat->cmap->n;
3455     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3456     cstart = cend - n;
3457   } else {
3458     cstart = rstart;
3459     cend   = rend;
3460     n      = cend - cstart;
3461   }
3462 
3463   /* loop over local rows, determining number of off diagonal entries */
3464   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3465   jj = 0;
3466   for (i=0; i<m; i++) {
3467     for (j=0; j<ourlens[i]; j++) {
3468       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3469       jj++;
3470     }
3471   }
3472 
3473   for (i=0; i<m; i++) {
3474     ourlens[i] -= offlens[i];
3475   }
3476   if (!sizesset) {
3477     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3478   }
3479 
3480   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3481 
3482   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3483 
3484   for (i=0; i<m; i++) {
3485     ourlens[i] += offlens[i];
3486   }
3487 
3488   if (!rank) {
3489     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3490 
3491     /* read in my part of the matrix numerical values  */
3492     nz   = procsnz[0];
3493     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3494 
3495     /* insert into matrix */
3496     jj      = rstart;
3497     smycols = mycols;
3498     svals   = vals;
3499     for (i=0; i<m; i++) {
3500       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3501       smycols += ourlens[i];
3502       svals   += ourlens[i];
3503       jj++;
3504     }
3505 
3506     /* read in other processors and ship out */
3507     for (i=1; i<size; i++) {
3508       nz     = procsnz[i];
3509       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3510       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3511     }
3512     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3513   } else {
3514     /* receive numeric values */
3515     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3516 
3517     /* receive message of values*/
3518     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3519 
3520     /* insert into matrix */
3521     jj      = rstart;
3522     smycols = mycols;
3523     svals   = vals;
3524     for (i=0; i<m; i++) {
3525       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3526       smycols += ourlens[i];
3527       svals   += ourlens[i];
3528       jj++;
3529     }
3530   }
3531   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3532   ierr = PetscFree(vals);CHKERRQ(ierr);
3533   ierr = PetscFree(mycols);CHKERRQ(ierr);
3534   ierr = PetscFree(rowners);CHKERRQ(ierr);
3535   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3536   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3537 
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 #undef __FUNCT__
3542 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3543 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3544 {
3545   PetscErrorCode ierr;
3546   IS             iscol_local;
3547   PetscInt       csize;
3548 
3549   PetscFunctionBegin;
3550   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3551   if (call == MAT_REUSE_MATRIX) {
3552     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3553     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3554   } else {
3555     PetscInt cbs;
3556     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3557     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3558     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3559   }
3560   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3561   if (call == MAT_INITIAL_MATRIX) {
3562     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3563     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3564   }
3565   PetscFunctionReturn(0);
3566 }
3567 
3568 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3569 #undef __FUNCT__
3570 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3571 /*
3572     Not great since it makes two copies of the submatrix, first an SeqAIJ
3573   in local and then by concatenating the local matrices the end result.
3574   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3575 
3576   Note: This requires a sequential iscol with all indices.
3577 */
3578 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3579 {
3580   PetscErrorCode ierr;
3581   PetscMPIInt    rank,size;
3582   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3583   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3584   PetscBool      allcolumns, colflag;
3585   Mat            M,Mreuse;
3586   MatScalar      *vwork,*aa;
3587   MPI_Comm       comm = ((PetscObject)mat)->comm;
3588   Mat_SeqAIJ     *aij;
3589 
3590 
3591   PetscFunctionBegin;
3592   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3593   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3594 
3595   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3596   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3597   if (colflag && ncol == mat->cmap->N){
3598     allcolumns = PETSC_TRUE;
3599   } else {
3600     allcolumns = PETSC_FALSE;
3601   }
3602   if (call ==  MAT_REUSE_MATRIX) {
3603     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3604     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3605     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3606   } else {
3607     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3608   }
3609 
3610   /*
3611       m - number of local rows
3612       n - number of columns (same on all processors)
3613       rstart - first row in new global matrix generated
3614   */
3615   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3616   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3617   if (call == MAT_INITIAL_MATRIX) {
3618     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3619     ii  = aij->i;
3620     jj  = aij->j;
3621 
3622     /*
3623         Determine the number of non-zeros in the diagonal and off-diagonal
3624         portions of the matrix in order to do correct preallocation
3625     */
3626 
3627     /* first get start and end of "diagonal" columns */
3628     if (csize == PETSC_DECIDE) {
3629       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3630       if (mglobal == n) { /* square matrix */
3631 	nlocal = m;
3632       } else {
3633         nlocal = n/size + ((n % size) > rank);
3634       }
3635     } else {
3636       nlocal = csize;
3637     }
3638     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3639     rstart = rend - nlocal;
3640     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3641 
3642     /* next, compute all the lengths */
3643     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3644     olens = dlens + m;
3645     for (i=0; i<m; i++) {
3646       jend = ii[i+1] - ii[i];
3647       olen = 0;
3648       dlen = 0;
3649       for (j=0; j<jend; j++) {
3650         if (*jj < rstart || *jj >= rend) olen++;
3651         else dlen++;
3652         jj++;
3653       }
3654       olens[i] = olen;
3655       dlens[i] = dlen;
3656     }
3657     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3658     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3659     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3660     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3661     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3662     ierr = PetscFree(dlens);CHKERRQ(ierr);
3663   } else {
3664     PetscInt ml,nl;
3665 
3666     M = *newmat;
3667     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3668     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3669     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3670     /*
3671          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3672        rather than the slower MatSetValues().
3673     */
3674     M->was_assembled = PETSC_TRUE;
3675     M->assembled     = PETSC_FALSE;
3676   }
3677   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3678   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3679   ii  = aij->i;
3680   jj  = aij->j;
3681   aa  = aij->a;
3682   for (i=0; i<m; i++) {
3683     row   = rstart + i;
3684     nz    = ii[i+1] - ii[i];
3685     cwork = jj;     jj += nz;
3686     vwork = aa;     aa += nz;
3687     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3688   }
3689 
3690   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692   *newmat = M;
3693 
3694   /* save submatrix used in processor for next request */
3695   if (call ==  MAT_INITIAL_MATRIX) {
3696     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3697     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3698   }
3699 
3700   PetscFunctionReturn(0);
3701 }
3702 
3703 EXTERN_C_BEGIN
3704 #undef __FUNCT__
3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3706 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3707 {
3708   PetscInt       m,cstart, cend,j,nnz,i,d;
3709   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3710   const PetscInt *JJ;
3711   PetscScalar    *values;
3712   PetscErrorCode ierr;
3713 
3714   PetscFunctionBegin;
3715   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3716 
3717   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3718   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3719   m      = B->rmap->n;
3720   cstart = B->cmap->rstart;
3721   cend   = B->cmap->rend;
3722   rstart = B->rmap->rstart;
3723 
3724   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3725 
3726 #if defined(PETSC_USE_DEBUGGING)
3727   for (i=0; i<m; i++) {
3728     nnz     = Ii[i+1]- Ii[i];
3729     JJ      = J + Ii[i];
3730     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3731     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3732     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3733   }
3734 #endif
3735 
3736   for (i=0; i<m; i++) {
3737     nnz     = Ii[i+1]- Ii[i];
3738     JJ      = J + Ii[i];
3739     nnz_max = PetscMax(nnz_max,nnz);
3740     d       = 0;
3741     for (j=0; j<nnz; j++) {
3742       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3743     }
3744     d_nnz[i] = d;
3745     o_nnz[i] = nnz - d;
3746   }
3747   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3748   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3749 
3750   if (v) values = (PetscScalar*)v;
3751   else {
3752     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3753     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3754   }
3755 
3756   for (i=0; i<m; i++) {
3757     ii   = i + rstart;
3758     nnz  = Ii[i+1]- Ii[i];
3759     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3760   }
3761   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3762   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3763 
3764   if (!v) {
3765     ierr = PetscFree(values);CHKERRQ(ierr);
3766   }
3767   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3768   PetscFunctionReturn(0);
3769 }
3770 EXTERN_C_END
3771 
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3774 /*@
3775    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3776    (the default parallel PETSc format).
3777 
3778    Collective on MPI_Comm
3779 
3780    Input Parameters:
3781 +  B - the matrix
3782 .  i - the indices into j for the start of each local row (starts with zero)
3783 .  j - the column indices for each local row (starts with zero)
3784 -  v - optional values in the matrix
3785 
3786    Level: developer
3787 
3788    Notes:
3789        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3790      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3791      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3792 
3793        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3794 
3795        The format which is used for the sparse matrix input, is equivalent to a
3796     row-major ordering.. i.e for the following matrix, the input data expected is
3797     as shown:
3798 
3799         1 0 0
3800         2 0 3     P0
3801        -------
3802         4 5 6     P1
3803 
3804      Process0 [P0]: rows_owned=[0,1]
3805         i =  {0,1,3}  [size = nrow+1  = 2+1]
3806         j =  {0,0,2}  [size = nz = 6]
3807         v =  {1,2,3}  [size = nz = 6]
3808 
3809      Process1 [P1]: rows_owned=[2]
3810         i =  {0,3}    [size = nrow+1  = 1+1]
3811         j =  {0,1,2}  [size = nz = 6]
3812         v =  {4,5,6}  [size = nz = 6]
3813 
3814 .keywords: matrix, aij, compressed row, sparse, parallel
3815 
3816 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3817           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3818 @*/
3819 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3820 {
3821   PetscErrorCode ierr;
3822 
3823   PetscFunctionBegin;
3824   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3825   PetscFunctionReturn(0);
3826 }
3827 
3828 #undef __FUNCT__
3829 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3830 /*@C
3831    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3832    (the default parallel PETSc format).  For good matrix assembly performance
3833    the user should preallocate the matrix storage by setting the parameters
3834    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3835    performance can be increased by more than a factor of 50.
3836 
3837    Collective on MPI_Comm
3838 
3839    Input Parameters:
3840 +  A - the matrix
3841 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3842            (same value is used for all local rows)
3843 .  d_nnz - array containing the number of nonzeros in the various rows of the
3844            DIAGONAL portion of the local submatrix (possibly different for each row)
3845            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3846            The size of this array is equal to the number of local rows, i.e 'm'.
3847            For matrices that will be factored, you must leave room for (and set)
3848            the diagonal entry even if it is zero.
3849 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3850            submatrix (same value is used for all local rows).
3851 -  o_nnz - array containing the number of nonzeros in the various rows of the
3852            OFF-DIAGONAL portion of the local submatrix (possibly different for
3853            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3854            structure. The size of this array is equal to the number
3855            of local rows, i.e 'm'.
3856 
3857    If the *_nnz parameter is given then the *_nz parameter is ignored
3858 
3859    The AIJ format (also called the Yale sparse matrix format or
3860    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3861    storage.  The stored row and column indices begin with zero.
3862    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3863 
3864    The parallel matrix is partitioned such that the first m0 rows belong to
3865    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3866    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3867 
3868    The DIAGONAL portion of the local submatrix of a processor can be defined
3869    as the submatrix which is obtained by extraction the part corresponding to
3870    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3871    first row that belongs to the processor, r2 is the last row belonging to
3872    the this processor, and c1-c2 is range of indices of the local part of a
3873    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3874    common case of a square matrix, the row and column ranges are the same and
3875    the DIAGONAL part is also square. The remaining portion of the local
3876    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3877 
3878    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3879 
3880    You can call MatGetInfo() to get information on how effective the preallocation was;
3881    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3882    You can also run with the option -info and look for messages with the string
3883    malloc in them to see if additional memory allocation was needed.
3884 
3885    Example usage:
3886 
3887    Consider the following 8x8 matrix with 34 non-zero values, that is
3888    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3889    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3890    as follows:
3891 
3892 .vb
3893             1  2  0  |  0  3  0  |  0  4
3894     Proc0   0  5  6  |  7  0  0  |  8  0
3895             9  0 10  | 11  0  0  | 12  0
3896     -------------------------------------
3897            13  0 14  | 15 16 17  |  0  0
3898     Proc1   0 18  0  | 19 20 21  |  0  0
3899             0  0  0  | 22 23  0  | 24  0
3900     -------------------------------------
3901     Proc2  25 26 27  |  0  0 28  | 29  0
3902            30  0  0  | 31 32 33  |  0 34
3903 .ve
3904 
3905    This can be represented as a collection of submatrices as:
3906 
3907 .vb
3908       A B C
3909       D E F
3910       G H I
3911 .ve
3912 
3913    Where the submatrices A,B,C are owned by proc0, D,E,F are
3914    owned by proc1, G,H,I are owned by proc2.
3915 
3916    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3917    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3918    The 'M','N' parameters are 8,8, and have the same values on all procs.
3919 
3920    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3921    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3922    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3923    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3924    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3925    matrix, ans [DF] as another SeqAIJ matrix.
3926 
3927    When d_nz, o_nz parameters are specified, d_nz storage elements are
3928    allocated for every row of the local diagonal submatrix, and o_nz
3929    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3930    One way to choose d_nz and o_nz is to use the max nonzerors per local
3931    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3932    In this case, the values of d_nz,o_nz are:
3933 .vb
3934      proc0 : dnz = 2, o_nz = 2
3935      proc1 : dnz = 3, o_nz = 2
3936      proc2 : dnz = 1, o_nz = 4
3937 .ve
3938    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3939    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3940    for proc3. i.e we are using 12+15+10=37 storage locations to store
3941    34 values.
3942 
3943    When d_nnz, o_nnz parameters are specified, the storage is specified
3944    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3945    In the above case the values for d_nnz,o_nnz are:
3946 .vb
3947      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3948      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3949      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3950 .ve
3951    Here the space allocated is sum of all the above values i.e 34, and
3952    hence pre-allocation is perfect.
3953 
3954    Level: intermediate
3955 
3956 .keywords: matrix, aij, compressed row, sparse, parallel
3957 
3958 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3959           MPIAIJ, MatGetInfo()
3960 @*/
3961 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3962 {
3963   PetscErrorCode ierr;
3964 
3965   PetscFunctionBegin;
3966   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3967   PetscValidType(B,1);
3968   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3969   PetscFunctionReturn(0);
3970 }
3971 
3972 #undef __FUNCT__
3973 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3974 /*@
3975      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3976          CSR format the local rows.
3977 
3978    Collective on MPI_Comm
3979 
3980    Input Parameters:
3981 +  comm - MPI communicator
3982 .  m - number of local rows (Cannot be PETSC_DECIDE)
3983 .  n - This value should be the same as the local size used in creating the
3984        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3985        calculated if N is given) For square matrices n is almost always m.
3986 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3987 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3988 .   i - row indices
3989 .   j - column indices
3990 -   a - matrix values
3991 
3992    Output Parameter:
3993 .   mat - the matrix
3994 
3995    Level: intermediate
3996 
3997    Notes:
3998        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3999      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4000      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4001 
4002        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4003 
4004        The format which is used for the sparse matrix input, is equivalent to a
4005     row-major ordering.. i.e for the following matrix, the input data expected is
4006     as shown:
4007 
4008         1 0 0
4009         2 0 3     P0
4010        -------
4011         4 5 6     P1
4012 
4013      Process0 [P0]: rows_owned=[0,1]
4014         i =  {0,1,3}  [size = nrow+1  = 2+1]
4015         j =  {0,0,2}  [size = nz = 6]
4016         v =  {1,2,3}  [size = nz = 6]
4017 
4018      Process1 [P1]: rows_owned=[2]
4019         i =  {0,3}    [size = nrow+1  = 1+1]
4020         j =  {0,1,2}  [size = nz = 6]
4021         v =  {4,5,6}  [size = nz = 6]
4022 
4023 .keywords: matrix, aij, compressed row, sparse, parallel
4024 
4025 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4026           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4027 @*/
4028 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4029 {
4030   PetscErrorCode ierr;
4031 
4032   PetscFunctionBegin;
4033   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4034   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4035   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4036   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4037   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4038   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4039   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4040   PetscFunctionReturn(0);
4041 }
4042 
4043 #undef __FUNCT__
4044 #define __FUNCT__ "MatCreateAIJ"
4045 /*@C
4046    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4047    (the default parallel PETSc format).  For good matrix assembly performance
4048    the user should preallocate the matrix storage by setting the parameters
4049    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4050    performance can be increased by more than a factor of 50.
4051 
4052    Collective on MPI_Comm
4053 
4054    Input Parameters:
4055 +  comm - MPI communicator
4056 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4057            This value should be the same as the local size used in creating the
4058            y vector for the matrix-vector product y = Ax.
4059 .  n - This value should be the same as the local size used in creating the
4060        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4061        calculated if N is given) For square matrices n is almost always m.
4062 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4063 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4064 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4065            (same value is used for all local rows)
4066 .  d_nnz - array containing the number of nonzeros in the various rows of the
4067            DIAGONAL portion of the local submatrix (possibly different for each row)
4068            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4069            The size of this array is equal to the number of local rows, i.e 'm'.
4070 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4071            submatrix (same value is used for all local rows).
4072 -  o_nnz - array containing the number of nonzeros in the various rows of the
4073            OFF-DIAGONAL portion of the local submatrix (possibly different for
4074            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4075            structure. The size of this array is equal to the number
4076            of local rows, i.e 'm'.
4077 
4078    Output Parameter:
4079 .  A - the matrix
4080 
4081    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4082    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4083    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4084 
4085    Notes:
4086    If the *_nnz parameter is given then the *_nz parameter is ignored
4087 
4088    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4089    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4090    storage requirements for this matrix.
4091 
4092    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4093    processor than it must be used on all processors that share the object for
4094    that argument.
4095 
4096    The user MUST specify either the local or global matrix dimensions
4097    (possibly both).
4098 
4099    The parallel matrix is partitioned across processors such that the
4100    first m0 rows belong to process 0, the next m1 rows belong to
4101    process 1, the next m2 rows belong to process 2 etc.. where
4102    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4103    values corresponding to [m x N] submatrix.
4104 
4105    The columns are logically partitioned with the n0 columns belonging
4106    to 0th partition, the next n1 columns belonging to the next
4107    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4108 
4109    The DIAGONAL portion of the local submatrix on any given processor
4110    is the submatrix corresponding to the rows and columns m,n
4111    corresponding to the given processor. i.e diagonal matrix on
4112    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4113    etc. The remaining portion of the local submatrix [m x (N-n)]
4114    constitute the OFF-DIAGONAL portion. The example below better
4115    illustrates this concept.
4116 
4117    For a square global matrix we define each processor's diagonal portion
4118    to be its local rows and the corresponding columns (a square submatrix);
4119    each processor's off-diagonal portion encompasses the remainder of the
4120    local matrix (a rectangular submatrix).
4121 
4122    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4123 
4124    When calling this routine with a single process communicator, a matrix of
4125    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4126    type of communicator, use the construction mechanism:
4127      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4128 
4129    By default, this format uses inodes (identical nodes) when possible.
4130    We search for consecutive rows with the same nonzero structure, thereby
4131    reusing matrix information to achieve increased efficiency.
4132 
4133    Options Database Keys:
4134 +  -mat_no_inode  - Do not use inodes
4135 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4136 -  -mat_aij_oneindex - Internally use indexing starting at 1
4137         rather than 0.  Note that when calling MatSetValues(),
4138         the user still MUST index entries starting at 0!
4139 
4140 
4141    Example usage:
4142 
4143    Consider the following 8x8 matrix with 34 non-zero values, that is
4144    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4145    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4146    as follows:
4147 
4148 .vb
4149             1  2  0  |  0  3  0  |  0  4
4150     Proc0   0  5  6  |  7  0  0  |  8  0
4151             9  0 10  | 11  0  0  | 12  0
4152     -------------------------------------
4153            13  0 14  | 15 16 17  |  0  0
4154     Proc1   0 18  0  | 19 20 21  |  0  0
4155             0  0  0  | 22 23  0  | 24  0
4156     -------------------------------------
4157     Proc2  25 26 27  |  0  0 28  | 29  0
4158            30  0  0  | 31 32 33  |  0 34
4159 .ve
4160 
4161    This can be represented as a collection of submatrices as:
4162 
4163 .vb
4164       A B C
4165       D E F
4166       G H I
4167 .ve
4168 
4169    Where the submatrices A,B,C are owned by proc0, D,E,F are
4170    owned by proc1, G,H,I are owned by proc2.
4171 
4172    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4173    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4174    The 'M','N' parameters are 8,8, and have the same values on all procs.
4175 
4176    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4177    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4178    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4179    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4180    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4181    matrix, ans [DF] as another SeqAIJ matrix.
4182 
4183    When d_nz, o_nz parameters are specified, d_nz storage elements are
4184    allocated for every row of the local diagonal submatrix, and o_nz
4185    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4186    One way to choose d_nz and o_nz is to use the max nonzerors per local
4187    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4188    In this case, the values of d_nz,o_nz are:
4189 .vb
4190      proc0 : dnz = 2, o_nz = 2
4191      proc1 : dnz = 3, o_nz = 2
4192      proc2 : dnz = 1, o_nz = 4
4193 .ve
4194    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4195    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4196    for proc3. i.e we are using 12+15+10=37 storage locations to store
4197    34 values.
4198 
4199    When d_nnz, o_nnz parameters are specified, the storage is specified
4200    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4201    In the above case the values for d_nnz,o_nnz are:
4202 .vb
4203      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4204      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4205      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4206 .ve
4207    Here the space allocated is sum of all the above values i.e 34, and
4208    hence pre-allocation is perfect.
4209 
4210    Level: intermediate
4211 
4212 .keywords: matrix, aij, compressed row, sparse, parallel
4213 
4214 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4215           MPIAIJ, MatCreateMPIAIJWithArrays()
4216 @*/
4217 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4218 {
4219   PetscErrorCode ierr;
4220   PetscMPIInt    size;
4221 
4222   PetscFunctionBegin;
4223   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4224   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4225   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4226   if (size > 1) {
4227     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4228     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4229   } else {
4230     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4231     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4232   }
4233   PetscFunctionReturn(0);
4234 }
4235 
4236 #undef __FUNCT__
4237 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4238 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4239 {
4240   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4241 
4242   PetscFunctionBegin;
4243   *Ad     = a->A;
4244   *Ao     = a->B;
4245   *colmap = a->garray;
4246   PetscFunctionReturn(0);
4247 }
4248 
4249 #undef __FUNCT__
4250 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4251 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4252 {
4253   PetscErrorCode ierr;
4254   PetscInt       i;
4255   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4256 
4257   PetscFunctionBegin;
4258   if (coloring->ctype == IS_COLORING_GLOBAL) {
4259     ISColoringValue *allcolors,*colors;
4260     ISColoring      ocoloring;
4261 
4262     /* set coloring for diagonal portion */
4263     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4264 
4265     /* set coloring for off-diagonal portion */
4266     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4267     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4268     for (i=0; i<a->B->cmap->n; i++) {
4269       colors[i] = allcolors[a->garray[i]];
4270     }
4271     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4272     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4273     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4274     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4275   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4276     ISColoringValue *colors;
4277     PetscInt        *larray;
4278     ISColoring      ocoloring;
4279 
4280     /* set coloring for diagonal portion */
4281     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4282     for (i=0; i<a->A->cmap->n; i++) {
4283       larray[i] = i + A->cmap->rstart;
4284     }
4285     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4286     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4287     for (i=0; i<a->A->cmap->n; i++) {
4288       colors[i] = coloring->colors[larray[i]];
4289     }
4290     ierr = PetscFree(larray);CHKERRQ(ierr);
4291     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4292     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4293     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4294 
4295     /* set coloring for off-diagonal portion */
4296     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4297     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4298     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4299     for (i=0; i<a->B->cmap->n; i++) {
4300       colors[i] = coloring->colors[larray[i]];
4301     }
4302     ierr = PetscFree(larray);CHKERRQ(ierr);
4303     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4304     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4305     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4306   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4307 
4308   PetscFunctionReturn(0);
4309 }
4310 
4311 #if defined(PETSC_HAVE_ADIC)
4312 #undef __FUNCT__
4313 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4314 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4315 {
4316   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4317   PetscErrorCode ierr;
4318 
4319   PetscFunctionBegin;
4320   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4321   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4322   PetscFunctionReturn(0);
4323 }
4324 #endif
4325 
4326 #undef __FUNCT__
4327 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4328 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4329 {
4330   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4331   PetscErrorCode ierr;
4332 
4333   PetscFunctionBegin;
4334   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4335   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4336   PetscFunctionReturn(0);
4337 }
4338 
4339 #undef __FUNCT__
4340 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4341 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4342 {
4343   PetscErrorCode ierr;
4344   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4345   PetscInt       *indx;
4346 
4347   PetscFunctionBegin;
4348   /* This routine will ONLY return MPIAIJ type matrix */
4349   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4350   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4351   if (n == PETSC_DECIDE){
4352     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4353   }
4354   /* Check sum(n) = N */
4355   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4356   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4357 
4358   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4359   rstart -= m;
4360 
4361   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4362   for (i=0;i<m;i++) {
4363     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4364     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4365     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4366   }
4367 
4368   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4369   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4370   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4371   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4372   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4373   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4374   PetscFunctionReturn(0);
4375 }
4376 
4377 #undef __FUNCT__
4378 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4379 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4380 {
4381   PetscErrorCode ierr;
4382   PetscInt       m,N,i,rstart,nnz,Ii;
4383   PetscInt       *indx;
4384   PetscScalar    *values;
4385 
4386   PetscFunctionBegin;
4387   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4388   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4389   for (i=0;i<m;i++) {
4390     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4391     Ii    = i + rstart;
4392     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4393     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4394   }
4395   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4396   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4397   PetscFunctionReturn(0);
4398 }
4399 
4400 #undef __FUNCT__
4401 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4402 /*@
4403       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4404                  matrices from each processor
4405 
4406     Collective on MPI_Comm
4407 
4408    Input Parameters:
4409 +    comm - the communicators the parallel matrix will live on
4410 .    inmat - the input sequential matrices
4411 .    n - number of local columns (or PETSC_DECIDE)
4412 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4413 
4414    Output Parameter:
4415 .    outmat - the parallel matrix generated
4416 
4417     Level: advanced
4418 
4419    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4420 
4421 @*/
4422 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4423 {
4424   PetscErrorCode ierr;
4425 
4426   PetscFunctionBegin;
4427   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4428   if (scall == MAT_INITIAL_MATRIX){
4429     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4430   }
4431   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4432   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 #undef __FUNCT__
4437 #define __FUNCT__ "MatFileSplit"
4438 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4439 {
4440   PetscErrorCode    ierr;
4441   PetscMPIInt       rank;
4442   PetscInt          m,N,i,rstart,nnz;
4443   size_t            len;
4444   const PetscInt    *indx;
4445   PetscViewer       out;
4446   char              *name;
4447   Mat               B;
4448   const PetscScalar *values;
4449 
4450   PetscFunctionBegin;
4451   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4452   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4453   /* Should this be the type of the diagonal block of A? */
4454   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4455   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4456   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4457   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4458   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4459   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4460   for (i=0;i<m;i++) {
4461     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4462     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4463     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4464   }
4465   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4466   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4467 
4468   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4469   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4470   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4471   sprintf(name,"%s.%d",outfile,rank);
4472   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4473   ierr = PetscFree(name);
4474   ierr = MatView(B,out);CHKERRQ(ierr);
4475   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4476   ierr = MatDestroy(&B);CHKERRQ(ierr);
4477   PetscFunctionReturn(0);
4478 }
4479 
4480 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4481 #undef __FUNCT__
4482 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4483 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4484 {
4485   PetscErrorCode       ierr;
4486   Mat_Merge_SeqsToMPI  *merge;
4487   PetscContainer       container;
4488 
4489   PetscFunctionBegin;
4490   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4491   if (container) {
4492     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4505     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4506     ierr = PetscFree(merge);CHKERRQ(ierr);
4507     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4508   }
4509   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #include <../src/mat/utils/freespace.h>
4514 #include <petscbt.h>
4515 
4516 #undef __FUNCT__
4517 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4518 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4519 {
4520   PetscErrorCode       ierr;
4521   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4522   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4523   PetscMPIInt          size,rank,taga,*len_s;
4524   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4525   PetscInt             proc,m;
4526   PetscInt             **buf_ri,**buf_rj;
4527   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4528   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4529   MPI_Request          *s_waits,*r_waits;
4530   MPI_Status           *status;
4531   MatScalar            *aa=a->a;
4532   MatScalar            **abuf_r,*ba_i;
4533   Mat_Merge_SeqsToMPI  *merge;
4534   PetscContainer       container;
4535 
4536   PetscFunctionBegin;
4537   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4538 
4539   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4540   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4541 
4542   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4543   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4544 
4545   bi     = merge->bi;
4546   bj     = merge->bj;
4547   buf_ri = merge->buf_ri;
4548   buf_rj = merge->buf_rj;
4549 
4550   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4551   owners = merge->rowmap->range;
4552   len_s  = merge->len_s;
4553 
4554   /* send and recv matrix values */
4555   /*-----------------------------*/
4556   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4557   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4558 
4559   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4560   for (proc=0,k=0; proc<size; proc++){
4561     if (!len_s[proc]) continue;
4562     i = owners[proc];
4563     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4564     k++;
4565   }
4566 
4567   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4568   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4569   ierr = PetscFree(status);CHKERRQ(ierr);
4570 
4571   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4572   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4573 
4574   /* insert mat values of mpimat */
4575   /*----------------------------*/
4576   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4577   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4578 
4579   for (k=0; k<merge->nrecv; k++){
4580     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4581     nrows = *(buf_ri_k[k]);
4582     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4583     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4584   }
4585 
4586   /* set values of ba */
4587   m = merge->rowmap->n;
4588   for (i=0; i<m; i++) {
4589     arow = owners[rank] + i;
4590     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4591     bnzi = bi[i+1] - bi[i];
4592     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4593 
4594     /* add local non-zero vals of this proc's seqmat into ba */
4595     anzi = ai[arow+1] - ai[arow];
4596     aj   = a->j + ai[arow];
4597     aa   = a->a + ai[arow];
4598     nextaj = 0;
4599     for (j=0; nextaj<anzi; j++){
4600       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4601         ba_i[j] += aa[nextaj++];
4602       }
4603     }
4604 
4605     /* add received vals into ba */
4606     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4607       /* i-th row */
4608       if (i == *nextrow[k]) {
4609         anzi = *(nextai[k]+1) - *nextai[k];
4610         aj   = buf_rj[k] + *(nextai[k]);
4611         aa   = abuf_r[k] + *(nextai[k]);
4612         nextaj = 0;
4613         for (j=0; nextaj<anzi; j++){
4614           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4615             ba_i[j] += aa[nextaj++];
4616           }
4617         }
4618         nextrow[k]++; nextai[k]++;
4619       }
4620     }
4621     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4622   }
4623   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4624   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4625 
4626   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4627   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4628   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4629   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4630   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4631   PetscFunctionReturn(0);
4632 }
4633 
4634 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4635 
4636 #undef __FUNCT__
4637 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4638 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4639 {
4640   PetscErrorCode       ierr;
4641   Mat                  B_mpi;
4642   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4643   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4644   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4645   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4646   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4647   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4648   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4649   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4650   MPI_Status           *status;
4651   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4652   PetscBT              lnkbt;
4653   Mat_Merge_SeqsToMPI  *merge;
4654   PetscContainer       container;
4655 
4656   PetscFunctionBegin;
4657   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4658 
4659   /* make sure it is a PETSc comm */
4660   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4661   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4662   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4663 
4664   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4665   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4666 
4667   /* determine row ownership */
4668   /*---------------------------------------------------------*/
4669   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4670   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4671   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4672   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4673   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4674   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4675   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4676 
4677   m      = merge->rowmap->n;
4678   M      = merge->rowmap->N;
4679   owners = merge->rowmap->range;
4680 
4681   /* determine the number of messages to send, their lengths */
4682   /*---------------------------------------------------------*/
4683   len_s  = merge->len_s;
4684 
4685   len = 0;  /* length of buf_si[] */
4686   merge->nsend = 0;
4687   for (proc=0; proc<size; proc++){
4688     len_si[proc] = 0;
4689     if (proc == rank){
4690       len_s[proc] = 0;
4691     } else {
4692       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4693       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4694     }
4695     if (len_s[proc]) {
4696       merge->nsend++;
4697       nrows = 0;
4698       for (i=owners[proc]; i<owners[proc+1]; i++){
4699         if (ai[i+1] > ai[i]) nrows++;
4700       }
4701       len_si[proc] = 2*(nrows+1);
4702       len += len_si[proc];
4703     }
4704   }
4705 
4706   /* determine the number and length of messages to receive for ij-structure */
4707   /*-------------------------------------------------------------------------*/
4708   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4709   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4710 
4711   /* post the Irecv of j-structure */
4712   /*-------------------------------*/
4713   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4714   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4715 
4716   /* post the Isend of j-structure */
4717   /*--------------------------------*/
4718   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4719 
4720   for (proc=0, k=0; proc<size; proc++){
4721     if (!len_s[proc]) continue;
4722     i = owners[proc];
4723     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4724     k++;
4725   }
4726 
4727   /* receives and sends of j-structure are complete */
4728   /*------------------------------------------------*/
4729   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4730   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4731 
4732   /* send and recv i-structure */
4733   /*---------------------------*/
4734   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4735   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4736 
4737   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4738   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4739   for (proc=0,k=0; proc<size; proc++){
4740     if (!len_s[proc]) continue;
4741     /* form outgoing message for i-structure:
4742          buf_si[0]:                 nrows to be sent
4743                [1:nrows]:           row index (global)
4744                [nrows+1:2*nrows+1]: i-structure index
4745     */
4746     /*-------------------------------------------*/
4747     nrows = len_si[proc]/2 - 1;
4748     buf_si_i    = buf_si + nrows+1;
4749     buf_si[0]   = nrows;
4750     buf_si_i[0] = 0;
4751     nrows = 0;
4752     for (i=owners[proc]; i<owners[proc+1]; i++){
4753       anzi = ai[i+1] - ai[i];
4754       if (anzi) {
4755         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4756         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4757         nrows++;
4758       }
4759     }
4760     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4761     k++;
4762     buf_si += len_si[proc];
4763   }
4764 
4765   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4766   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4767 
4768   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4769   for (i=0; i<merge->nrecv; i++){
4770     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4771   }
4772 
4773   ierr = PetscFree(len_si);CHKERRQ(ierr);
4774   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4775   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4776   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4777   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4778   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4779   ierr = PetscFree(status);CHKERRQ(ierr);
4780 
4781   /* compute a local seq matrix in each processor */
4782   /*----------------------------------------------*/
4783   /* allocate bi array and free space for accumulating nonzero column info */
4784   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4785   bi[0] = 0;
4786 
4787   /* create and initialize a linked list */
4788   nlnk = N+1;
4789   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4790 
4791   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4792   len = 0;
4793   len  = ai[owners[rank+1]] - ai[owners[rank]];
4794   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4795   current_space = free_space;
4796 
4797   /* determine symbolic info for each local row */
4798   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4799 
4800   for (k=0; k<merge->nrecv; k++){
4801     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4802     nrows = *buf_ri_k[k];
4803     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4804     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4805   }
4806 
4807   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4808   len = 0;
4809   for (i=0;i<m;i++) {
4810     bnzi   = 0;
4811     /* add local non-zero cols of this proc's seqmat into lnk */
4812     arow   = owners[rank] + i;
4813     anzi   = ai[arow+1] - ai[arow];
4814     aj     = a->j + ai[arow];
4815     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4816     bnzi += nlnk;
4817     /* add received col data into lnk */
4818     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4819       if (i == *nextrow[k]) { /* i-th row */
4820         anzi = *(nextai[k]+1) - *nextai[k];
4821         aj   = buf_rj[k] + *nextai[k];
4822         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4823         bnzi += nlnk;
4824         nextrow[k]++; nextai[k]++;
4825       }
4826     }
4827     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4828 
4829     /* if free space is not available, make more free space */
4830     if (current_space->local_remaining<bnzi) {
4831       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4832       nspacedouble++;
4833     }
4834     /* copy data into free space, then initialize lnk */
4835     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4836     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4837 
4838     current_space->array           += bnzi;
4839     current_space->local_used      += bnzi;
4840     current_space->local_remaining -= bnzi;
4841 
4842     bi[i+1] = bi[i] + bnzi;
4843   }
4844 
4845   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4846 
4847   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4848   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4849   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4850 
4851   /* create symbolic parallel matrix B_mpi */
4852   /*---------------------------------------*/
4853     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4854   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4855   if (n==PETSC_DECIDE) {
4856     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4857   } else {
4858     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4859   }
4860   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4861   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4862   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4863   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4864   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4865 
4866   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4867   B_mpi->assembled     = PETSC_FALSE;
4868   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4869   merge->bi            = bi;
4870   merge->bj            = bj;
4871   merge->buf_ri        = buf_ri;
4872   merge->buf_rj        = buf_rj;
4873   merge->coi           = PETSC_NULL;
4874   merge->coj           = PETSC_NULL;
4875   merge->owners_co     = PETSC_NULL;
4876 
4877   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4878 
4879   /* attach the supporting struct to B_mpi for reuse */
4880   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4881   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4882   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4883   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4884   *mpimat = B_mpi;
4885 
4886   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4887   PetscFunctionReturn(0);
4888 }
4889 
4890 #undef __FUNCT__
4891 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4892 /*@C
4893       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4894                  matrices from each processor
4895 
4896     Collective on MPI_Comm
4897 
4898    Input Parameters:
4899 +    comm - the communicators the parallel matrix will live on
4900 .    seqmat - the input sequential matrices
4901 .    m - number of local rows (or PETSC_DECIDE)
4902 .    n - number of local columns (or PETSC_DECIDE)
4903 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4904 
4905    Output Parameter:
4906 .    mpimat - the parallel matrix generated
4907 
4908     Level: advanced
4909 
4910    Notes:
4911      The dimensions of the sequential matrix in each processor MUST be the same.
4912      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4913      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4914 @*/
4915 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4916 {
4917   PetscErrorCode   ierr;
4918   PetscMPIInt     size;
4919 
4920   PetscFunctionBegin;
4921   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4922   if (size == 1){
4923      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4924      if (scall == MAT_INITIAL_MATRIX){
4925        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4926      } else {
4927        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4928      }
4929      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930      PetscFunctionReturn(0);
4931   }
4932   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4933   if (scall == MAT_INITIAL_MATRIX){
4934     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4935   }
4936   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4937   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 #undef __FUNCT__
4942 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4943 /*@
4944      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4945           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4946           with MatGetSize()
4947 
4948     Not Collective
4949 
4950    Input Parameters:
4951 +    A - the matrix
4952 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4953 
4954    Output Parameter:
4955 .    A_loc - the local sequential matrix generated
4956 
4957     Level: developer
4958 
4959 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4960 
4961 @*/
4962 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4963 {
4964   PetscErrorCode  ierr;
4965   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4966   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4967   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4968   MatScalar       *aa=a->a,*ba=b->a,*cam;
4969   PetscScalar     *ca;
4970   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4971   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4972   PetscBool       match;
4973 
4974   PetscFunctionBegin;
4975   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4976   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4977   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4978   if (scall == MAT_INITIAL_MATRIX){
4979     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4980     ci[0] = 0;
4981     for (i=0; i<am; i++){
4982       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4983     }
4984     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4985     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4986     k = 0;
4987     for (i=0; i<am; i++) {
4988       ncols_o = bi[i+1] - bi[i];
4989       ncols_d = ai[i+1] - ai[i];
4990       /* off-diagonal portion of A */
4991       for (jo=0; jo<ncols_o; jo++) {
4992         col = cmap[*bj];
4993         if (col >= cstart) break;
4994         cj[k]   = col; bj++;
4995         ca[k++] = *ba++;
4996       }
4997       /* diagonal portion of A */
4998       for (j=0; j<ncols_d; j++) {
4999         cj[k]   = cstart + *aj++;
5000         ca[k++] = *aa++;
5001       }
5002       /* off-diagonal portion of A */
5003       for (j=jo; j<ncols_o; j++) {
5004         cj[k]   = cmap[*bj++];
5005         ca[k++] = *ba++;
5006       }
5007     }
5008     /* put together the new matrix */
5009     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5010     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5011     /* Since these are PETSc arrays, change flags to free them as necessary. */
5012     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5013     mat->free_a  = PETSC_TRUE;
5014     mat->free_ij = PETSC_TRUE;
5015     mat->nonew   = 0;
5016   } else if (scall == MAT_REUSE_MATRIX){
5017     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5018     ci = mat->i; cj = mat->j; cam = mat->a;
5019     for (i=0; i<am; i++) {
5020       /* off-diagonal portion of A */
5021       ncols_o = bi[i+1] - bi[i];
5022       for (jo=0; jo<ncols_o; jo++) {
5023         col = cmap[*bj];
5024         if (col >= cstart) break;
5025         *cam++ = *ba++; bj++;
5026       }
5027       /* diagonal portion of A */
5028       ncols_d = ai[i+1] - ai[i];
5029       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5030       /* off-diagonal portion of A */
5031       for (j=jo; j<ncols_o; j++) {
5032         *cam++ = *ba++; bj++;
5033       }
5034     }
5035   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5036   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5037   PetscFunctionReturn(0);
5038 }
5039 
5040 #undef __FUNCT__
5041 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5042 /*@C
5043      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5044 
5045     Not Collective
5046 
5047    Input Parameters:
5048 +    A - the matrix
5049 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5050 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5051 
5052    Output Parameter:
5053 .    A_loc - the local sequential matrix generated
5054 
5055     Level: developer
5056 
5057 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5058 
5059 @*/
5060 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5061 {
5062   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5063   PetscErrorCode    ierr;
5064   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5065   IS                isrowa,iscola;
5066   Mat               *aloc;
5067   PetscBool       match;
5068 
5069   PetscFunctionBegin;
5070   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5071   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5072   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5073   if (!row){
5074     start = A->rmap->rstart; end = A->rmap->rend;
5075     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5076   } else {
5077     isrowa = *row;
5078   }
5079   if (!col){
5080     start = A->cmap->rstart;
5081     cmap  = a->garray;
5082     nzA   = a->A->cmap->n;
5083     nzB   = a->B->cmap->n;
5084     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5085     ncols = 0;
5086     for (i=0; i<nzB; i++) {
5087       if (cmap[i] < start) idx[ncols++] = cmap[i];
5088       else break;
5089     }
5090     imark = i;
5091     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5092     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5093     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5094   } else {
5095     iscola = *col;
5096   }
5097   if (scall != MAT_INITIAL_MATRIX){
5098     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5099     aloc[0] = *A_loc;
5100   }
5101   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5102   *A_loc = aloc[0];
5103   ierr = PetscFree(aloc);CHKERRQ(ierr);
5104   if (!row){
5105     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5106   }
5107   if (!col){
5108     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5109   }
5110   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5111   PetscFunctionReturn(0);
5112 }
5113 
5114 #undef __FUNCT__
5115 #define __FUNCT__ "MatGetBrowsOfAcols"
5116 /*@C
5117     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5118 
5119     Collective on Mat
5120 
5121    Input Parameters:
5122 +    A,B - the matrices in mpiaij format
5123 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5124 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5125 
5126    Output Parameter:
5127 +    rowb, colb - index sets of rows and columns of B to extract
5128 -    B_seq - the sequential matrix generated
5129 
5130     Level: developer
5131 
5132 @*/
5133 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5134 {
5135   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5136   PetscErrorCode    ierr;
5137   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5138   IS                isrowb,iscolb;
5139   Mat               *bseq=PETSC_NULL;
5140 
5141   PetscFunctionBegin;
5142   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5143     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5144   }
5145   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5146 
5147   if (scall == MAT_INITIAL_MATRIX){
5148     start = A->cmap->rstart;
5149     cmap  = a->garray;
5150     nzA   = a->A->cmap->n;
5151     nzB   = a->B->cmap->n;
5152     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5153     ncols = 0;
5154     for (i=0; i<nzB; i++) {  /* row < local row index */
5155       if (cmap[i] < start) idx[ncols++] = cmap[i];
5156       else break;
5157     }
5158     imark = i;
5159     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5160     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5161     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5162     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5163   } else {
5164     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5165     isrowb = *rowb; iscolb = *colb;
5166     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5167     bseq[0] = *B_seq;
5168   }
5169   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5170   *B_seq = bseq[0];
5171   ierr = PetscFree(bseq);CHKERRQ(ierr);
5172   if (!rowb){
5173     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5174   } else {
5175     *rowb = isrowb;
5176   }
5177   if (!colb){
5178     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5179   } else {
5180     *colb = iscolb;
5181   }
5182   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5183   PetscFunctionReturn(0);
5184 }
5185 
5186 #undef __FUNCT__
5187 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5188 /*
5189     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5190     of the OFF-DIAGONAL portion of local A
5191 
5192     Collective on Mat
5193 
5194    Input Parameters:
5195 +    A,B - the matrices in mpiaij format
5196 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5197 
5198    Output Parameter:
5199 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5200 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5201 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5202 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5203 
5204     Level: developer
5205 
5206 */
5207 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5208 {
5209   VecScatter_MPI_General *gen_to,*gen_from;
5210   PetscErrorCode         ierr;
5211   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5212   Mat_SeqAIJ             *b_oth;
5213   VecScatter             ctx=a->Mvctx;
5214   MPI_Comm               comm=((PetscObject)ctx)->comm;
5215   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5216   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5217   PetscScalar            *rvalues,*svalues;
5218   MatScalar              *b_otha,*bufa,*bufA;
5219   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5220   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5221   MPI_Status             *sstatus,rstatus;
5222   PetscMPIInt            jj;
5223   PetscInt               *cols,sbs,rbs;
5224   PetscScalar            *vals;
5225 
5226   PetscFunctionBegin;
5227   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5228     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5229   }
5230   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5231   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5232 
5233   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5234   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5235   rvalues  = gen_from->values; /* holds the length of receiving row */
5236   svalues  = gen_to->values;   /* holds the length of sending row */
5237   nrecvs   = gen_from->n;
5238   nsends   = gen_to->n;
5239 
5240   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5241   srow     = gen_to->indices;   /* local row index to be sent */
5242   sstarts  = gen_to->starts;
5243   sprocs   = gen_to->procs;
5244   sstatus  = gen_to->sstatus;
5245   sbs      = gen_to->bs;
5246   rstarts  = gen_from->starts;
5247   rprocs   = gen_from->procs;
5248   rbs      = gen_from->bs;
5249 
5250   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5251   if (scall == MAT_INITIAL_MATRIX){
5252     /* i-array */
5253     /*---------*/
5254     /*  post receives */
5255     for (i=0; i<nrecvs; i++){
5256       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5257       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5258       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5259     }
5260 
5261     /* pack the outgoing message */
5262     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5263     sstartsj[0] = 0;  rstartsj[0] = 0;
5264     len = 0; /* total length of j or a array to be sent */
5265     k = 0;
5266     for (i=0; i<nsends; i++){
5267       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5268       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5269       for (j=0; j<nrows; j++) {
5270         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5271         for (l=0; l<sbs; l++){
5272           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5273           rowlen[j*sbs+l] = ncols;
5274           len += ncols;
5275           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5276         }
5277         k++;
5278       }
5279       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5280       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5281     }
5282     /* recvs and sends of i-array are completed */
5283     i = nrecvs;
5284     while (i--) {
5285       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5286     }
5287     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5288 
5289     /* allocate buffers for sending j and a arrays */
5290     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5291     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5292 
5293     /* create i-array of B_oth */
5294     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5295     b_othi[0] = 0;
5296     len = 0; /* total length of j or a array to be received */
5297     k = 0;
5298     for (i=0; i<nrecvs; i++){
5299       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5300       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5301       for (j=0; j<nrows; j++) {
5302         b_othi[k+1] = b_othi[k] + rowlen[j];
5303         len += rowlen[j]; k++;
5304       }
5305       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5306     }
5307 
5308     /* allocate space for j and a arrrays of B_oth */
5309     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5310     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5311 
5312     /* j-array */
5313     /*---------*/
5314     /*  post receives of j-array */
5315     for (i=0; i<nrecvs; i++){
5316       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5317       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5318     }
5319 
5320     /* pack the outgoing message j-array */
5321     k = 0;
5322     for (i=0; i<nsends; i++){
5323       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5324       bufJ = bufj+sstartsj[i];
5325       for (j=0; j<nrows; j++) {
5326         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5327         for (ll=0; ll<sbs; ll++){
5328           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5329           for (l=0; l<ncols; l++){
5330             *bufJ++ = cols[l];
5331           }
5332           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5333         }
5334       }
5335       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5336     }
5337 
5338     /* recvs and sends of j-array are completed */
5339     i = nrecvs;
5340     while (i--) {
5341       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5342     }
5343     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5344   } else if (scall == MAT_REUSE_MATRIX){
5345     sstartsj = *startsj_s;
5346     rstartsj = *startsj_r;
5347     bufa     = *bufa_ptr;
5348     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5349     b_otha   = b_oth->a;
5350   } else {
5351     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5352   }
5353 
5354   /* a-array */
5355   /*---------*/
5356   /*  post receives of a-array */
5357   for (i=0; i<nrecvs; i++){
5358     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5359     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5360   }
5361 
5362   /* pack the outgoing message a-array */
5363   k = 0;
5364   for (i=0; i<nsends; i++){
5365     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5366     bufA = bufa+sstartsj[i];
5367     for (j=0; j<nrows; j++) {
5368       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5369       for (ll=0; ll<sbs; ll++){
5370         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5371         for (l=0; l<ncols; l++){
5372           *bufA++ = vals[l];
5373         }
5374         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5375       }
5376     }
5377     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5378   }
5379   /* recvs and sends of a-array are completed */
5380   i = nrecvs;
5381   while (i--) {
5382     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5383   }
5384   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5385   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5386 
5387   if (scall == MAT_INITIAL_MATRIX){
5388     /* put together the new matrix */
5389     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5390 
5391     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5392     /* Since these are PETSc arrays, change flags to free them as necessary. */
5393     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5394     b_oth->free_a  = PETSC_TRUE;
5395     b_oth->free_ij = PETSC_TRUE;
5396     b_oth->nonew   = 0;
5397 
5398     ierr = PetscFree(bufj);CHKERRQ(ierr);
5399     if (!startsj_s || !bufa_ptr){
5400       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5401       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5402     } else {
5403       *startsj_s = sstartsj;
5404       *startsj_r = rstartsj;
5405       *bufa_ptr  = bufa;
5406     }
5407   }
5408   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5409   PetscFunctionReturn(0);
5410 }
5411 
5412 #undef __FUNCT__
5413 #define __FUNCT__ "MatGetCommunicationStructs"
5414 /*@C
5415   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5416 
5417   Not Collective
5418 
5419   Input Parameters:
5420 . A - The matrix in mpiaij format
5421 
5422   Output Parameter:
5423 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5424 . colmap - A map from global column index to local index into lvec
5425 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5426 
5427   Level: developer
5428 
5429 @*/
5430 #if defined (PETSC_USE_CTABLE)
5431 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5432 #else
5433 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5434 #endif
5435 {
5436   Mat_MPIAIJ *a;
5437 
5438   PetscFunctionBegin;
5439   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5440   PetscValidPointer(lvec, 2);
5441   PetscValidPointer(colmap, 3);
5442   PetscValidPointer(multScatter, 4);
5443   a = (Mat_MPIAIJ *) A->data;
5444   if (lvec) *lvec = a->lvec;
5445   if (colmap) *colmap = a->colmap;
5446   if (multScatter) *multScatter = a->Mvctx;
5447   PetscFunctionReturn(0);
5448 }
5449 
5450 EXTERN_C_BEGIN
5451 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5452 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5453 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5454 EXTERN_C_END
5455 
5456 #undef __FUNCT__
5457 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5458 /*
5459     Computes (B'*A')' since computing B*A directly is untenable
5460 
5461                n                       p                          p
5462         (              )       (              )         (                  )
5463       m (      A       )  *  n (       B      )   =   m (         C        )
5464         (              )       (              )         (                  )
5465 
5466 */
5467 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5468 {
5469   PetscErrorCode     ierr;
5470   Mat                At,Bt,Ct;
5471 
5472   PetscFunctionBegin;
5473   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5474   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5475   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5476   ierr = MatDestroy(&At);CHKERRQ(ierr);
5477   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5478   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5479   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5480   PetscFunctionReturn(0);
5481 }
5482 
5483 #undef __FUNCT__
5484 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5485 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5486 {
5487   PetscErrorCode ierr;
5488   PetscInt       m=A->rmap->n,n=B->cmap->n;
5489   Mat            Cmat;
5490 
5491   PetscFunctionBegin;
5492   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5493   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5494   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5495   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5496   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5497   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5498   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5499   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5500   *C   = Cmat;
5501   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 /* ----------------------------------------------------------------*/
5506 #undef __FUNCT__
5507 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5508 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5509 {
5510   PetscErrorCode ierr;
5511 
5512   PetscFunctionBegin;
5513   if (scall == MAT_INITIAL_MATRIX){
5514     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5515   }
5516   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5517   PetscFunctionReturn(0);
5518 }
5519 
5520 EXTERN_C_BEGIN
5521 #if defined(PETSC_HAVE_MUMPS)
5522 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5523 #endif
5524 #if defined(PETSC_HAVE_PASTIX)
5525 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5526 #endif
5527 #if defined(PETSC_HAVE_SUPERLU_DIST)
5528 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5529 #endif
5530 #if defined(PETSC_HAVE_SPOOLES)
5531 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5532 #endif
5533 EXTERN_C_END
5534 
5535 /*MC
5536    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5537 
5538    Options Database Keys:
5539 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5540 
5541   Level: beginner
5542 
5543 .seealso: MatCreateAIJ()
5544 M*/
5545 
5546 EXTERN_C_BEGIN
5547 #undef __FUNCT__
5548 #define __FUNCT__ "MatCreate_MPIAIJ"
5549 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5550 {
5551   Mat_MPIAIJ     *b;
5552   PetscErrorCode ierr;
5553   PetscMPIInt    size;
5554 
5555   PetscFunctionBegin;
5556   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5557   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5558   B->data         = (void*)b;
5559   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5560   B->assembled    = PETSC_FALSE;
5561   B->insertmode   = NOT_SET_VALUES;
5562   b->size         = size;
5563   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5564 
5565   /* build cache for off array entries formed */
5566   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5567   b->donotstash  = PETSC_FALSE;
5568   b->colmap      = 0;
5569   b->garray      = 0;
5570   b->roworiented = PETSC_TRUE;
5571 
5572   /* stuff used for matrix vector multiply */
5573   b->lvec      = PETSC_NULL;
5574   b->Mvctx     = PETSC_NULL;
5575 
5576   /* stuff for MatGetRow() */
5577   b->rowindices   = 0;
5578   b->rowvalues    = 0;
5579   b->getrowactive = PETSC_FALSE;
5580 
5581   /* flexible pointer used in CUSP/CUSPARSE classes */
5582   b->spptr        = PETSC_NULL;
5583 #if defined(PETSC_HAVE_SPOOLES)
5584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5585                                      "MatGetFactor_mpiaij_spooles",
5586                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5587 #endif
5588 #if defined(PETSC_HAVE_MUMPS)
5589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5590                                      "MatGetFactor_aij_mumps",
5591                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5592 #endif
5593 #if defined(PETSC_HAVE_PASTIX)
5594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5595 					   "MatGetFactor_mpiaij_pastix",
5596 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5597 #endif
5598 #if defined(PETSC_HAVE_SUPERLU_DIST)
5599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5600                                      "MatGetFactor_mpiaij_superlu_dist",
5601                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5602 #endif
5603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5604                                      "MatStoreValues_MPIAIJ",
5605                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5607                                      "MatRetrieveValues_MPIAIJ",
5608                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5610 				     "MatGetDiagonalBlock_MPIAIJ",
5611                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5613 				     "MatIsTranspose_MPIAIJ",
5614 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5616 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5617 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5619 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5620 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5622 				     "MatDiagonalScaleLocal_MPIAIJ",
5623 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5624   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5625                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5626                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5627   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5628                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5629                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5630   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5631                                      "MatConvert_MPIAIJ_MPISBAIJ",
5632                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5633   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5634                                      "MatMatMult_MPIDense_MPIAIJ",
5635                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5636   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5637                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5638                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5640                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5641                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5643   PetscFunctionReturn(0);
5644 }
5645 EXTERN_C_END
5646 
5647 #undef __FUNCT__
5648 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5649 /*@
5650      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5651          and "off-diagonal" part of the matrix in CSR format.
5652 
5653    Collective on MPI_Comm
5654 
5655    Input Parameters:
5656 +  comm - MPI communicator
5657 .  m - number of local rows (Cannot be PETSC_DECIDE)
5658 .  n - This value should be the same as the local size used in creating the
5659        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5660        calculated if N is given) For square matrices n is almost always m.
5661 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5662 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5663 .   i - row indices for "diagonal" portion of matrix
5664 .   j - column indices
5665 .   a - matrix values
5666 .   oi - row indices for "off-diagonal" portion of matrix
5667 .   oj - column indices
5668 -   oa - matrix values
5669 
5670    Output Parameter:
5671 .   mat - the matrix
5672 
5673    Level: advanced
5674 
5675    Notes:
5676        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5677        must free the arrays once the matrix has been destroyed and not before.
5678 
5679        The i and j indices are 0 based
5680 
5681        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5682 
5683        This sets local rows and cannot be used to set off-processor values.
5684 
5685        You cannot later use MatSetValues() to change values in this matrix.
5686 
5687 .keywords: matrix, aij, compressed row, sparse, parallel
5688 
5689 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5690           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5691 @*/
5692 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5693 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5694 {
5695   PetscErrorCode ierr;
5696   Mat_MPIAIJ     *maij;
5697 
5698  PetscFunctionBegin;
5699   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5700   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5701   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5702   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5703   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5704   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5705   maij = (Mat_MPIAIJ*) (*mat)->data;
5706   maij->donotstash     = PETSC_TRUE;
5707   (*mat)->preallocated = PETSC_TRUE;
5708 
5709   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5710   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5711 
5712   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5713   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5714 
5715   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5716   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5717   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5718   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5719 
5720   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5721   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5722   PetscFunctionReturn(0);
5723 }
5724 
5725 /*
5726     Special version for direct calls from Fortran
5727 */
5728 #include <petsc-private/fortranimpl.h>
5729 
5730 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5731 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5732 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5733 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5734 #endif
5735 
5736 /* Change these macros so can be used in void function */
5737 #undef CHKERRQ
5738 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5739 #undef SETERRQ2
5740 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5741 #undef SETERRQ3
5742 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5743 #undef SETERRQ
5744 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5745 
5746 EXTERN_C_BEGIN
5747 #undef __FUNCT__
5748 #define __FUNCT__ "matsetvaluesmpiaij_"
5749 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5750 {
5751   Mat             mat = *mmat;
5752   PetscInt        m = *mm, n = *mn;
5753   InsertMode      addv = *maddv;
5754   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5755   PetscScalar     value;
5756   PetscErrorCode  ierr;
5757 
5758   MatCheckPreallocated(mat,1);
5759   if (mat->insertmode == NOT_SET_VALUES) {
5760     mat->insertmode = addv;
5761   }
5762 #if defined(PETSC_USE_DEBUG)
5763   else if (mat->insertmode != addv) {
5764     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5765   }
5766 #endif
5767   {
5768   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5769   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5770   PetscBool       roworiented = aij->roworiented;
5771 
5772   /* Some Variables required in the macro */
5773   Mat             A = aij->A;
5774   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5775   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5776   MatScalar       *aa = a->a;
5777   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5778   Mat             B = aij->B;
5779   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5780   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5781   MatScalar       *ba = b->a;
5782 
5783   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5784   PetscInt        nonew = a->nonew;
5785   MatScalar       *ap1,*ap2;
5786 
5787   PetscFunctionBegin;
5788   for (i=0; i<m; i++) {
5789     if (im[i] < 0) continue;
5790 #if defined(PETSC_USE_DEBUG)
5791     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5792 #endif
5793     if (im[i] >= rstart && im[i] < rend) {
5794       row      = im[i] - rstart;
5795       lastcol1 = -1;
5796       rp1      = aj + ai[row];
5797       ap1      = aa + ai[row];
5798       rmax1    = aimax[row];
5799       nrow1    = ailen[row];
5800       low1     = 0;
5801       high1    = nrow1;
5802       lastcol2 = -1;
5803       rp2      = bj + bi[row];
5804       ap2      = ba + bi[row];
5805       rmax2    = bimax[row];
5806       nrow2    = bilen[row];
5807       low2     = 0;
5808       high2    = nrow2;
5809 
5810       for (j=0; j<n; j++) {
5811         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5812         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5813         if (in[j] >= cstart && in[j] < cend){
5814           col = in[j] - cstart;
5815           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5816         } else if (in[j] < 0) continue;
5817 #if defined(PETSC_USE_DEBUG)
5818         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5819 #endif
5820         else {
5821           if (mat->was_assembled) {
5822             if (!aij->colmap) {
5823               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5824             }
5825 #if defined (PETSC_USE_CTABLE)
5826             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5827 	    col--;
5828 #else
5829             col = aij->colmap[in[j]] - 1;
5830 #endif
5831             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5832               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5833               col =  in[j];
5834               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5835               B = aij->B;
5836               b = (Mat_SeqAIJ*)B->data;
5837               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5838               rp2      = bj + bi[row];
5839               ap2      = ba + bi[row];
5840               rmax2    = bimax[row];
5841               nrow2    = bilen[row];
5842               low2     = 0;
5843               high2    = nrow2;
5844               bm       = aij->B->rmap->n;
5845               ba = b->a;
5846             }
5847           } else col = in[j];
5848           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5849         }
5850       }
5851     } else {
5852       if (!aij->donotstash) {
5853         if (roworiented) {
5854           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5855         } else {
5856           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5857         }
5858       }
5859     }
5860   }}
5861   PetscFunctionReturnVoid();
5862 }
5863 EXTERN_C_END
5864 
5865