xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8dc1d2a3a0b0219f67591c34b0f761797c23be35)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302   FILE              *file;
1303 
1304   PetscFunctionBegin;
1305   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1306   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1307   nz   = A->nz + B->nz;
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) {
1319       rlen = PetscMax(rlen,range[i+1] - range[i]);
1320     }
1321   } else {
1322     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1323     rlen = mat->rmap->n;
1324   }
1325 
1326   /* load up the local row counts */
1327   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1328   for (i=0; i<mat->rmap->n; i++) {
1329     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1330   }
1331 
1332   /* store the row lengths to the file */
1333   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1334   if (!rank) {
1335     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     for (i=1; i<size; i++) {
1337       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1338       rlen = range[i+1] - range[i];
1339       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1345     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1346     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1349 
1350   /* load up the local column indices */
1351   nzmax = nz; /* )th processor needs space a largest processor needs */
1352   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1353   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1354   cnt  = 0;
1355   for (i=0; i<mat->rmap->n; i++) {
1356     for (j=B->i[i]; j<B->i[i+1]; j++) {
1357       if ( (col = garray[B->j[j]]) > cstart) break;
1358       column_indices[cnt++] = col;
1359     }
1360     for (k=A->i[i]; k<A->i[i+1]; k++) {
1361       column_indices[cnt++] = A->j[k] + cstart;
1362     }
1363     for (; j<B->i[i+1]; j++) {
1364       column_indices[cnt++] = garray[B->j[j]];
1365     }
1366   }
1367   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1368 
1369   /* store the column indices to the file */
1370    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1371   if (!rank) {
1372     MPI_Status status;
1373     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     for (i=1; i<size; i++) {
1375       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1376       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1377       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1378       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1380     }
1381      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1382   } else {
1383     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1384     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1386     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1387   }
1388   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1389 
1390   /* load up the local column values */
1391   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1392   cnt  = 0;
1393   for (i=0; i<mat->rmap->n; i++) {
1394     for (j=B->i[i]; j<B->i[i+1]; j++) {
1395       if ( garray[B->j[j]] > cstart) break;
1396       column_values[cnt++] = B->a[j];
1397     }
1398     for (k=A->i[i]; k<A->i[i+1]; k++) {
1399       column_values[cnt++] = A->a[k];
1400     }
1401     for (; j<B->i[i+1]; j++) {
1402       column_values[cnt++] = B->a[j];
1403     }
1404   }
1405   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1406 
1407   /* store the column values to the file */
1408    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1409   if (!rank) {
1410     MPI_Status status;
1411     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     for (i=1; i<size; i++) {
1413        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1414       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1415       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1416       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1420   } else {
1421     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1422     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1424     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1425   }
1426   ierr = PetscFree(column_values);CHKERRQ(ierr);
1427 
1428   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1429   if (file) {
1430     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1431   }
1432 
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1439 {
1440   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1441   PetscErrorCode    ierr;
1442   PetscMPIInt       rank = aij->rank,size = aij->size;
1443   PetscBool         isdraw,iascii,isbinary;
1444   PetscViewer       sviewer;
1445   PetscViewerFormat format;
1446 
1447   PetscFunctionBegin;
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1451   if (iascii) {
1452     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1453     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1454       MatInfo    info;
1455       PetscBool  inodes;
1456 
1457       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1458       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1461       if (!inodes) {
1462         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1463 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1464       } else {
1465         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1466 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1467       }
1468       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1469       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1470       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1472       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1475       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1476       PetscFunctionReturn(0);
1477     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1478       PetscInt   inodecount,inodelimit,*inodes;
1479       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1480       if (inodes) {
1481         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1482       } else {
1483         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1484       }
1485       PetscFunctionReturn(0);
1486     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1487       PetscFunctionReturn(0);
1488     }
1489   } else if (isbinary) {
1490     if (size == 1) {
1491       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1492       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1493     } else {
1494       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1495     }
1496     PetscFunctionReturn(0);
1497   } else if (isdraw) {
1498     PetscDraw  draw;
1499     PetscBool  isnull;
1500     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1501     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1502   }
1503 
1504   if (size == 1) {
1505     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1506     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1507   } else {
1508     /* assemble the entire matrix onto first processor. */
1509     Mat         A;
1510     Mat_SeqAIJ  *Aloc;
1511     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1512     MatScalar   *a;
1513 
1514     if (mat->rmap->N > 1024) {
1515       PetscBool  flg = PETSC_FALSE;
1516 
1517       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1518       if (!flg) {
1519         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1520       }
1521     }
1522 
1523     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1524     if (!rank) {
1525       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1526     } else {
1527       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1528     }
1529     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1530     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1531     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1532     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1533     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1534 
1535     /* copy over the A part */
1536     Aloc = (Mat_SeqAIJ*)aij->A->data;
1537     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row = mat->rmap->rstart;
1539     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1542       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1543     }
1544     aj = Aloc->j;
1545     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1546 
1547     /* copy over the B part */
1548     Aloc = (Mat_SeqAIJ*)aij->B->data;
1549     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550     row  = mat->rmap->rstart;
1551     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1552     ct   = cols;
1553     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1554     for (i=0; i<m; i++) {
1555       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1556       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1557     }
1558     ierr = PetscFree(ct);CHKERRQ(ierr);
1559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1561     /*
1562        Everyone has to call to draw the matrix since the graphics waits are
1563        synchronized across all processors that share the PetscDraw object
1564     */
1565     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     if (!rank) {
1567       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1568       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1569       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1570       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1571     }
1572     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1573     ierr = MatDestroy(&A);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatView_MPIAIJ"
1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1581 {
1582   PetscErrorCode ierr;
1583   PetscBool      iascii,isdraw,issocket,isbinary;
1584 
1585   PetscFunctionBegin;
1586   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1587   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1588   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1590   if (iascii || isdraw || isbinary || issocket) {
1591     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1592   } else {
1593     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatSOR_MPIAIJ"
1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1601 {
1602   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1603   PetscErrorCode ierr;
1604   Vec            bb1 = 0;
1605   PetscBool      hasop;
1606 
1607   PetscFunctionBegin;
1608   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1609     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1610   }
1611 
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622 
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1651     if (flag & SOR_ZERO_INITIAL_GUESS) {
1652       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1653       its--;
1654     }
1655     while (its--) {
1656       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658 
1659       /* update rhs: bb1 = bb - B*x */
1660       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1661       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1662 
1663       /* local sweep */
1664       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1665     }
1666   }  else if (flag & SOR_EISENSTAT) {
1667     Vec         xx1;
1668 
1669     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1670     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1671 
1672     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674     if (!mat->diag) {
1675       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1676       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1677     }
1678     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1679     if (hasop) {
1680       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1681     } else {
1682       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1683     }
1684     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1685 
1686     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1687 
1688     /* local sweep */
1689     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1691     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1692   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1693 
1694   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatPermute_MPIAIJ"
1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1701 {
1702   MPI_Comm       comm;
1703   PetscInt       first,local_rowsize,local_colsize;
1704   const PetscInt *rows;
1705   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1710   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1711   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1712   /* collect the global row permutation and invert it */
1713   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1714   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1715   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1716   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1717   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1718   /* get the local target indices */
1719   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1720   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1721   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1722   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1723   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1724   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1725   /* the column permutation is so much easier;
1726      make a local version of 'colp' and invert it */
1727   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1728   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1729   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1730   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1731   /* now we just get the submatrix */
1732   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1733   /* clean up */
1734   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1735   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   Mat            A = mat->A,B = mat->B;
1745   PetscErrorCode ierr;
1746   PetscReal      isend[5],irecv[5];
1747 
1748   PetscFunctionBegin;
1749   info->block_size     = 1.0;
1750   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1752   isend[3] = info->memory;  isend[4] = info->mallocs;
1753   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1754   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1755   isend[3] += info->memory;  isend[4] += info->mallocs;
1756   if (flag == MAT_LOCAL) {
1757     info->nz_used      = isend[0];
1758     info->nz_allocated = isend[1];
1759     info->nz_unneeded  = isend[2];
1760     info->memory       = isend[3];
1761     info->mallocs      = isend[4];
1762   } else if (flag == MAT_GLOBAL_MAX) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   } else if (flag == MAT_GLOBAL_SUM) {
1770     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1771     info->nz_used      = irecv[0];
1772     info->nz_allocated = irecv[1];
1773     info->nz_unneeded  = irecv[2];
1774     info->memory       = irecv[3];
1775     info->mallocs      = irecv[4];
1776   }
1777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1778   info->fill_ratio_needed = 0;
1779   info->factor_mallocs    = 0;
1780 
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatSetOption_MPIAIJ"
1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1787 {
1788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1789   PetscErrorCode ierr;
1790 
1791   PetscFunctionBegin;
1792   switch (op) {
1793   case MAT_NEW_NONZERO_LOCATIONS:
1794   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1795   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1796   case MAT_KEEP_NONZERO_PATTERN:
1797   case MAT_NEW_NONZERO_LOCATION_ERR:
1798   case MAT_USE_INODES:
1799   case MAT_IGNORE_ZERO_ENTRIES:
1800     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1801     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1802     break;
1803   case MAT_ROW_ORIENTED:
1804     a->roworiented = flg;
1805     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1806     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_NEW_DIAGONALS:
1809     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1810     break;
1811   case MAT_IGNORE_OFF_PROC_ENTRIES:
1812     a->donotstash = flg;
1813     break;
1814   case MAT_SPD:
1815     A->spd_set                         = PETSC_TRUE;
1816     A->spd                             = flg;
1817     if (flg) {
1818       A->symmetric                     = PETSC_TRUE;
1819       A->structurally_symmetric        = PETSC_TRUE;
1820       A->symmetric_set                 = PETSC_TRUE;
1821       A->structurally_symmetric_set    = PETSC_TRUE;
1822     }
1823     break;
1824   case MAT_SYMMETRIC:
1825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826     break;
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1829     break;
1830   case MAT_HERMITIAN:
1831     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1832     break;
1833   case MAT_SYMMETRY_ETERNAL:
1834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1835     break;
1836 #if defined(PETSC_HAVE_TXPETSCGPU)
1837     // Sorry, I don't know how else to add MatOptions for data specific
1838     // to GPU classes without protecting against it in this case statement.
1839     // If anyone knows of a better way, let me know. Paul
1840   case MAT_DIAGBLOCK_CSR:
1841   case MAT_OFFDIAGBLOCK_CSR:
1842   case MAT_CSR:
1843   case MAT_DIAGBLOCK_DIA:
1844   case MAT_OFFDIAGBLOCK_DIA:
1845   case MAT_DIA:
1846   case MAT_DIAGBLOCK_ELL:
1847   case MAT_OFFDIAGBLOCK_ELL:
1848   case MAT_ELL:
1849   case MAT_DIAGBLOCK_HYB:
1850   case MAT_OFFDIAGBLOCK_HYB:
1851   case MAT_HYB:
1852     /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */
1853     break;
1854 #endif
1855   default:
1856     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatGetRow_MPIAIJ"
1863 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1864 {
1865   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1866   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1867   PetscErrorCode ierr;
1868   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1869   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1870   PetscInt       *cmap,*idx_p;
1871 
1872   PetscFunctionBegin;
1873   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1874   mat->getrowactive = PETSC_TRUE;
1875 
1876   if (!mat->rowvalues && (idx || v)) {
1877     /*
1878         allocate enough space to hold information from the longest row.
1879     */
1880     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1881     PetscInt   max = 1,tmp;
1882     for (i=0; i<matin->rmap->n; i++) {
1883       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1884       if (max < tmp) { max = tmp; }
1885     }
1886     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1887   }
1888 
1889   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1890   lrow = row - rstart;
1891 
1892   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1893   if (!v)   {pvA = 0; pvB = 0;}
1894   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1895   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1896   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1897   nztot = nzA + nzB;
1898 
1899   cmap  = mat->garray;
1900   if (v  || idx) {
1901     if (nztot) {
1902       /* Sort by increasing column numbers, assuming A and B already sorted */
1903       PetscInt imark = -1;
1904       if (v) {
1905         *v = v_p = mat->rowvalues;
1906         for (i=0; i<nzB; i++) {
1907           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1908           else break;
1909         }
1910         imark = i;
1911         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1912         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1913       }
1914       if (idx) {
1915         *idx = idx_p = mat->rowindices;
1916         if (imark > -1) {
1917           for (i=0; i<imark; i++) {
1918             idx_p[i] = cmap[cworkB[i]];
1919           }
1920         } else {
1921           for (i=0; i<nzB; i++) {
1922             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1923             else break;
1924           }
1925           imark = i;
1926         }
1927         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1928         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1929       }
1930     } else {
1931       if (idx) *idx = 0;
1932       if (v)   *v   = 0;
1933     }
1934   }
1935   *nz = nztot;
1936   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1937   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1943 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1944 {
1945   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1946 
1947   PetscFunctionBegin;
1948   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1949   aij->getrowactive = PETSC_FALSE;
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatNorm_MPIAIJ"
1955 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1956 {
1957   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1958   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1959   PetscErrorCode ierr;
1960   PetscInt       i,j,cstart = mat->cmap->rstart;
1961   PetscReal      sum = 0.0;
1962   MatScalar      *v;
1963 
1964   PetscFunctionBegin;
1965   if (aij->size == 1) {
1966     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1967   } else {
1968     if (type == NORM_FROBENIUS) {
1969       v = amat->a;
1970       for (i=0; i<amat->nz; i++) {
1971 #if defined(PETSC_USE_COMPLEX)
1972         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1973 #else
1974         sum += (*v)*(*v); v++;
1975 #endif
1976       }
1977       v = bmat->a;
1978       for (i=0; i<bmat->nz; i++) {
1979 #if defined(PETSC_USE_COMPLEX)
1980         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1981 #else
1982         sum += (*v)*(*v); v++;
1983 #endif
1984       }
1985       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1986       *norm = PetscSqrtReal(*norm);
1987     } else if (type == NORM_1) { /* max column norm */
1988       PetscReal *tmp,*tmp2;
1989       PetscInt  *jj,*garray = aij->garray;
1990       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1991       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1992       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1993       *norm = 0.0;
1994       v = amat->a; jj = amat->j;
1995       for (j=0; j<amat->nz; j++) {
1996         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1997       }
1998       v = bmat->a; jj = bmat->j;
1999       for (j=0; j<bmat->nz; j++) {
2000         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2001       }
2002       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2003       for (j=0; j<mat->cmap->N; j++) {
2004         if (tmp2[j] > *norm) *norm = tmp2[j];
2005       }
2006       ierr = PetscFree(tmp);CHKERRQ(ierr);
2007       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2008     } else if (type == NORM_INFINITY) { /* max row norm */
2009       PetscReal ntemp = 0.0;
2010       for (j=0; j<aij->A->rmap->n; j++) {
2011         v = amat->a + amat->i[j];
2012         sum = 0.0;
2013         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2014           sum += PetscAbsScalar(*v); v++;
2015         }
2016         v = bmat->a + bmat->i[j];
2017         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2018           sum += PetscAbsScalar(*v); v++;
2019         }
2020         if (sum > ntemp) ntemp = sum;
2021       }
2022       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2023     } else {
2024       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2025     }
2026   }
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 #undef __FUNCT__
2031 #define __FUNCT__ "MatTranspose_MPIAIJ"
2032 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2033 {
2034   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2035   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2036   PetscErrorCode ierr;
2037   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2038   PetscInt       cstart=A->cmap->rstart,ncol;
2039   Mat            B;
2040   MatScalar      *array;
2041 
2042   PetscFunctionBegin;
2043   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2044 
2045   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2046   ai = Aloc->i; aj = Aloc->j;
2047   bi = Bloc->i; bj = Bloc->j;
2048   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2049     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2050     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2051     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2052     for (i=0; i<ai[ma]; i++){
2053       d_nnz[aj[i]] ++;
2054       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2055     }
2056 
2057     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2058     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2059     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2060     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2061     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2062     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2063     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2064   } else {
2065     B = *matout;
2066   }
2067 
2068   /* copy over the A part */
2069   array = Aloc->a;
2070   row = A->rmap->rstart;
2071   for (i=0; i<ma; i++) {
2072     ncol = ai[i+1]-ai[i];
2073     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2074     row++; array += ncol; aj += ncol;
2075   }
2076   aj = Aloc->j;
2077   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2078 
2079   /* copy over the B part */
2080   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2081   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2082   array = Bloc->a;
2083   row = A->rmap->rstart;
2084   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2085   cols_tmp = cols;
2086   for (i=0; i<mb; i++) {
2087     ncol = bi[i+1]-bi[i];
2088     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2089     row++; array += ncol; cols_tmp += ncol;
2090   }
2091   ierr = PetscFree(cols);CHKERRQ(ierr);
2092 
2093   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2094   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2096     *matout = B;
2097   } else {
2098     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2099   }
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 #undef __FUNCT__
2104 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2105 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2106 {
2107   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2108   Mat            a = aij->A,b = aij->B;
2109   PetscErrorCode ierr;
2110   PetscInt       s1,s2,s3;
2111 
2112   PetscFunctionBegin;
2113   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2114   if (rr) {
2115     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2116     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2117     /* Overlap communication with computation. */
2118     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2119   }
2120   if (ll) {
2121     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2122     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2123     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2124   }
2125   /* scale  the diagonal block */
2126   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2127 
2128   if (rr) {
2129     /* Do a scatter end and then right scale the off-diagonal block */
2130     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2131     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2132   }
2133 
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2139 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2140 {
2141   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatEqual_MPIAIJ"
2151 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2152 {
2153   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2154   Mat            a,b,c,d;
2155   PetscBool      flg;
2156   PetscErrorCode ierr;
2157 
2158   PetscFunctionBegin;
2159   a = matA->A; b = matA->B;
2160   c = matB->A; d = matB->B;
2161 
2162   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2163   if (flg) {
2164     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2165   }
2166   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 #undef __FUNCT__
2171 #define __FUNCT__ "MatCopy_MPIAIJ"
2172 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2173 {
2174   PetscErrorCode ierr;
2175   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2176   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2177 
2178   PetscFunctionBegin;
2179   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2180   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2181     /* because of the column compression in the off-processor part of the matrix a->B,
2182        the number of columns in a->B and b->B may be different, hence we cannot call
2183        the MatCopy() directly on the two parts. If need be, we can provide a more
2184        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2185        then copying the submatrices */
2186     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2187   } else {
2188     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2189     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2190   }
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatSetUp_MPIAIJ"
2196 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2197 {
2198   PetscErrorCode ierr;
2199 
2200   PetscFunctionBegin;
2201   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2207 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2208 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2209 {
2210   PetscInt          i,m=Y->rmap->N;
2211   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2212   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2213   const PetscInt    *xi = x->i,*yi = y->i;
2214 
2215   PetscFunctionBegin;
2216   /* Set the number of nonzeros in the new matrix */
2217   for(i=0; i<m; i++) {
2218     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2219     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2220     nnz[i] = 0;
2221     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2222       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2223       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2224       nnz[i]++;
2225     }
2226     for (; k<nzy; k++) nnz[i]++;
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatAXPY_MPIAIJ"
2233 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2234 {
2235   PetscErrorCode ierr;
2236   PetscInt       i;
2237   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2238   PetscBLASInt   bnz,one=1;
2239   Mat_SeqAIJ     *x,*y;
2240 
2241   PetscFunctionBegin;
2242   if (str == SAME_NONZERO_PATTERN) {
2243     PetscScalar alpha = a;
2244     x = (Mat_SeqAIJ *)xx->A->data;
2245     y = (Mat_SeqAIJ *)yy->A->data;
2246     bnz = PetscBLASIntCast(x->nz);
2247     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2248     x = (Mat_SeqAIJ *)xx->B->data;
2249     y = (Mat_SeqAIJ *)yy->B->data;
2250     bnz = PetscBLASIntCast(x->nz);
2251     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2252   } else if (str == SUBSET_NONZERO_PATTERN) {
2253     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2254 
2255     x = (Mat_SeqAIJ *)xx->B->data;
2256     y = (Mat_SeqAIJ *)yy->B->data;
2257     if (y->xtoy && y->XtoY != xx->B) {
2258       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2259       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2260     }
2261     if (!y->xtoy) { /* get xtoy */
2262       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2263       y->XtoY = xx->B;
2264       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2265     }
2266     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2267   } else {
2268     Mat B;
2269     PetscInt *nnz_d,*nnz_o;
2270     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2271     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2272     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2273     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2274     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2275     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2276     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2277     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2278     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2279     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2280     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2281     ierr = MatHeaderReplace(Y,B);
2282     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2283     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2284   }
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2289 
2290 #undef __FUNCT__
2291 #define __FUNCT__ "MatConjugate_MPIAIJ"
2292 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2293 {
2294 #if defined(PETSC_USE_COMPLEX)
2295   PetscErrorCode ierr;
2296   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2300   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2301 #else
2302   PetscFunctionBegin;
2303 #endif
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatRealPart_MPIAIJ"
2309 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2310 {
2311   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2316   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #undef __FUNCT__
2321 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2322 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2323 {
2324   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2325   PetscErrorCode ierr;
2326 
2327   PetscFunctionBegin;
2328   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2329   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 #ifdef PETSC_HAVE_PBGL
2334 
2335 #include <boost/parallel/mpi/bsp_process_group.hpp>
2336 #include <boost/graph/distributed/ilu_default_graph.hpp>
2337 #include <boost/graph/distributed/ilu_0_block.hpp>
2338 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2339 #include <boost/graph/distributed/petsc/interface.hpp>
2340 #include <boost/multi_array.hpp>
2341 #include <boost/parallel/distributed_property_map->hpp>
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2345 /*
2346   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2347 */
2348 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2349 {
2350   namespace petsc = boost::distributed::petsc;
2351 
2352   namespace graph_dist = boost::graph::distributed;
2353   using boost::graph::distributed::ilu_default::process_group_type;
2354   using boost::graph::ilu_permuted;
2355 
2356   PetscBool       row_identity, col_identity;
2357   PetscContainer  c;
2358   PetscInt        m, n, M, N;
2359   PetscErrorCode  ierr;
2360 
2361   PetscFunctionBegin;
2362   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2363   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2364   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2365   if (!row_identity || !col_identity) {
2366     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2367   }
2368 
2369   process_group_type pg;
2370   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2371   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2372   lgraph_type&   level_graph = *lgraph_p;
2373   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2374 
2375   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2376   ilu_permuted(level_graph);
2377 
2378   /* put together the new matrix */
2379   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2380   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2381   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2382   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2383   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2384   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2385   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2386   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387 
2388   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2389   ierr = PetscContainerSetPointer(c, lgraph_p);
2390   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2391   ierr = PetscContainerDestroy(&c);
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2397 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2398 {
2399   PetscFunctionBegin;
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatSolve_MPIAIJ"
2405 /*
2406   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2407 */
2408 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2409 {
2410   namespace graph_dist = boost::graph::distributed;
2411 
2412   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2413   lgraph_type*   lgraph_p;
2414   PetscContainer c;
2415   PetscErrorCode ierr;
2416 
2417   PetscFunctionBegin;
2418   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2419   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2420   ierr = VecCopy(b, x);CHKERRQ(ierr);
2421 
2422   PetscScalar* array_x;
2423   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2424   PetscInt sx;
2425   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2426 
2427   PetscScalar* array_b;
2428   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2429   PetscInt sb;
2430   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2431 
2432   lgraph_type&   level_graph = *lgraph_p;
2433   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2434 
2435   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2436   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2437                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2438 
2439   typedef boost::iterator_property_map<array_ref_type::iterator,
2440                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2441   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2442                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2443 
2444   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2445 
2446   PetscFunctionReturn(0);
2447 }
2448 #endif
2449 
2450 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2451   PetscInt       nzlocal,nsends,nrecvs;
2452   PetscMPIInt    *send_rank,*recv_rank;
2453   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2454   PetscScalar    *sbuf_a,**rbuf_a;
2455   PetscErrorCode (*Destroy)(Mat);
2456 } Mat_Redundant;
2457 
2458 #undef __FUNCT__
2459 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2460 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2461 {
2462   PetscErrorCode       ierr;
2463   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2464   PetscInt             i;
2465 
2466   PetscFunctionBegin;
2467   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2468   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2469   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2470   for (i=0; i<redund->nrecvs; i++){
2471     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2472     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2473   }
2474   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2475   ierr = PetscFree(redund);CHKERRQ(ierr);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 #undef __FUNCT__
2480 #define __FUNCT__ "MatDestroy_MatRedundant"
2481 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2482 {
2483   PetscErrorCode  ierr;
2484   PetscContainer  container;
2485   Mat_Redundant   *redund=PETSC_NULL;
2486 
2487   PetscFunctionBegin;
2488   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2489   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2490   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2491   A->ops->destroy = redund->Destroy;
2492   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2493   if (A->ops->destroy) {
2494     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2495   }
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2501 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2502 {
2503   PetscMPIInt    rank,size;
2504   MPI_Comm       comm=((PetscObject)mat)->comm;
2505   PetscErrorCode ierr;
2506   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2507   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2508   PetscInt       *rowrange=mat->rmap->range;
2509   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2510   Mat            A=aij->A,B=aij->B,C=*matredundant;
2511   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2512   PetscScalar    *sbuf_a;
2513   PetscInt       nzlocal=a->nz+b->nz;
2514   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2515   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2516   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2517   MatScalar      *aworkA,*aworkB;
2518   PetscScalar    *vals;
2519   PetscMPIInt    tag1,tag2,tag3,imdex;
2520   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2521                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2522   MPI_Status     recv_status,*send_status;
2523   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2524   PetscInt       **rbuf_j=PETSC_NULL;
2525   PetscScalar    **rbuf_a=PETSC_NULL;
2526   Mat_Redundant  *redund=PETSC_NULL;
2527   PetscContainer container;
2528 
2529   PetscFunctionBegin;
2530   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2531   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2532 
2533   if (reuse == MAT_REUSE_MATRIX) {
2534     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2535     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2536     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2537     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2538     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2539     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2540     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2541     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2542 
2543     nsends    = redund->nsends;
2544     nrecvs    = redund->nrecvs;
2545     send_rank = redund->send_rank;
2546     recv_rank = redund->recv_rank;
2547     sbuf_nz   = redund->sbuf_nz;
2548     rbuf_nz   = redund->rbuf_nz;
2549     sbuf_j    = redund->sbuf_j;
2550     sbuf_a    = redund->sbuf_a;
2551     rbuf_j    = redund->rbuf_j;
2552     rbuf_a    = redund->rbuf_a;
2553   }
2554 
2555   if (reuse == MAT_INITIAL_MATRIX){
2556     PetscMPIInt  subrank,subsize;
2557     PetscInt     nleftover,np_subcomm;
2558     /* get the destination processors' id send_rank, nsends and nrecvs */
2559     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2560     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2561     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2562     np_subcomm = size/nsubcomm;
2563     nleftover  = size - nsubcomm*np_subcomm;
2564     nsends = 0; nrecvs = 0;
2565     for (i=0; i<size; i++){ /* i=rank*/
2566       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2567         send_rank[nsends] = i; nsends++;
2568         recv_rank[nrecvs++] = i;
2569       }
2570     }
2571     if (rank >= size - nleftover){/* this proc is a leftover processor */
2572       i = size-nleftover-1;
2573       j = 0;
2574       while (j < nsubcomm - nleftover){
2575         send_rank[nsends++] = i;
2576         i--; j++;
2577       }
2578     }
2579 
2580     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2581       for (i=0; i<nleftover; i++){
2582         recv_rank[nrecvs++] = size-nleftover+i;
2583       }
2584     }
2585 
2586     /* allocate sbuf_j, sbuf_a */
2587     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2588     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2589     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2590   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2591 
2592   /* copy mat's local entries into the buffers */
2593   if (reuse == MAT_INITIAL_MATRIX){
2594     rownz_max = 0;
2595     rptr = sbuf_j;
2596     cols = sbuf_j + rend-rstart + 1;
2597     vals = sbuf_a;
2598     rptr[0] = 0;
2599     for (i=0; i<rend-rstart; i++){
2600       row = i + rstart;
2601       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2602       ncols  = nzA + nzB;
2603       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2604       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2605       /* load the column indices for this row into cols */
2606       lwrite = 0;
2607       for (l=0; l<nzB; l++) {
2608         if ((ctmp = bmap[cworkB[l]]) < cstart){
2609           vals[lwrite]   = aworkB[l];
2610           cols[lwrite++] = ctmp;
2611         }
2612       }
2613       for (l=0; l<nzA; l++){
2614         vals[lwrite]   = aworkA[l];
2615         cols[lwrite++] = cstart + cworkA[l];
2616       }
2617       for (l=0; l<nzB; l++) {
2618         if ((ctmp = bmap[cworkB[l]]) >= cend){
2619           vals[lwrite]   = aworkB[l];
2620           cols[lwrite++] = ctmp;
2621         }
2622       }
2623       vals += ncols;
2624       cols += ncols;
2625       rptr[i+1] = rptr[i] + ncols;
2626       if (rownz_max < ncols) rownz_max = ncols;
2627     }
2628     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2629   } else { /* only copy matrix values into sbuf_a */
2630     rptr = sbuf_j;
2631     vals = sbuf_a;
2632     rptr[0] = 0;
2633     for (i=0; i<rend-rstart; i++){
2634       row = i + rstart;
2635       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2636       ncols  = nzA + nzB;
2637       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2638       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2639       lwrite = 0;
2640       for (l=0; l<nzB; l++) {
2641         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2642       }
2643       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2644       for (l=0; l<nzB; l++) {
2645         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2646       }
2647       vals += ncols;
2648       rptr[i+1] = rptr[i] + ncols;
2649     }
2650   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2651 
2652   /* send nzlocal to others, and recv other's nzlocal */
2653   /*--------------------------------------------------*/
2654   if (reuse == MAT_INITIAL_MATRIX){
2655     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2656     s_waits2 = s_waits3 + nsends;
2657     s_waits1 = s_waits2 + nsends;
2658     r_waits1 = s_waits1 + nsends;
2659     r_waits2 = r_waits1 + nrecvs;
2660     r_waits3 = r_waits2 + nrecvs;
2661   } else {
2662     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2663     r_waits3 = s_waits3 + nsends;
2664   }
2665 
2666   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2667   if (reuse == MAT_INITIAL_MATRIX){
2668     /* get new tags to keep the communication clean */
2669     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2670     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2671     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2672 
2673     /* post receives of other's nzlocal */
2674     for (i=0; i<nrecvs; i++){
2675       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2676     }
2677     /* send nzlocal to others */
2678     for (i=0; i<nsends; i++){
2679       sbuf_nz[i] = nzlocal;
2680       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2681     }
2682     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2683     count = nrecvs;
2684     while (count) {
2685       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2686       recv_rank[imdex] = recv_status.MPI_SOURCE;
2687       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2688       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2689 
2690       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2691       rbuf_nz[imdex] += i + 2;
2692       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2693       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2694       count--;
2695     }
2696     /* wait on sends of nzlocal */
2697     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2698     /* send mat->i,j to others, and recv from other's */
2699     /*------------------------------------------------*/
2700     for (i=0; i<nsends; i++){
2701       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2702       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2703     }
2704     /* wait on receives of mat->i,j */
2705     /*------------------------------*/
2706     count = nrecvs;
2707     while (count) {
2708       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2709       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2710       count--;
2711     }
2712     /* wait on sends of mat->i,j */
2713     /*---------------------------*/
2714     if (nsends) {
2715       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2716     }
2717   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2718 
2719   /* post receives, send and receive mat->a */
2720   /*----------------------------------------*/
2721   for (imdex=0; imdex<nrecvs; imdex++) {
2722     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2723   }
2724   for (i=0; i<nsends; i++){
2725     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2726   }
2727   count = nrecvs;
2728   while (count) {
2729     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2730     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2731     count--;
2732   }
2733   if (nsends) {
2734     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2735   }
2736 
2737   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2738 
2739   /* create redundant matrix */
2740   /*-------------------------*/
2741   if (reuse == MAT_INITIAL_MATRIX){
2742     /* compute rownz_max for preallocation */
2743     for (imdex=0; imdex<nrecvs; imdex++){
2744       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2745       rptr = rbuf_j[imdex];
2746       for (i=0; i<j; i++){
2747         ncols = rptr[i+1] - rptr[i];
2748         if (rownz_max < ncols) rownz_max = ncols;
2749       }
2750     }
2751 
2752     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2753     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2754     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2755     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2756     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2757     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2758   } else {
2759     C = *matredundant;
2760   }
2761 
2762   /* insert local matrix entries */
2763   rptr = sbuf_j;
2764   cols = sbuf_j + rend-rstart + 1;
2765   vals = sbuf_a;
2766   for (i=0; i<rend-rstart; i++){
2767     row   = i + rstart;
2768     ncols = rptr[i+1] - rptr[i];
2769     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2770     vals += ncols;
2771     cols += ncols;
2772   }
2773   /* insert received matrix entries */
2774   for (imdex=0; imdex<nrecvs; imdex++){
2775     rstart = rowrange[recv_rank[imdex]];
2776     rend   = rowrange[recv_rank[imdex]+1];
2777     rptr = rbuf_j[imdex];
2778     cols = rbuf_j[imdex] + rend-rstart + 1;
2779     vals = rbuf_a[imdex];
2780     for (i=0; i<rend-rstart; i++){
2781       row   = i + rstart;
2782       ncols = rptr[i+1] - rptr[i];
2783       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2784       vals += ncols;
2785       cols += ncols;
2786     }
2787   }
2788   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2789   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2790   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2791   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2792   if (reuse == MAT_INITIAL_MATRIX) {
2793     PetscContainer container;
2794     *matredundant = C;
2795     /* create a supporting struct and attach it to C for reuse */
2796     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2797     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2798     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2799     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2800     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2801     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2802 
2803     redund->nzlocal = nzlocal;
2804     redund->nsends  = nsends;
2805     redund->nrecvs  = nrecvs;
2806     redund->send_rank = send_rank;
2807     redund->recv_rank = recv_rank;
2808     redund->sbuf_nz = sbuf_nz;
2809     redund->rbuf_nz = rbuf_nz;
2810     redund->sbuf_j  = sbuf_j;
2811     redund->sbuf_a  = sbuf_a;
2812     redund->rbuf_j  = rbuf_j;
2813     redund->rbuf_a  = rbuf_a;
2814 
2815     redund->Destroy = C->ops->destroy;
2816     C->ops->destroy = MatDestroy_MatRedundant;
2817   }
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 #undef __FUNCT__
2822 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2823 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2824 {
2825   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2826   PetscErrorCode ierr;
2827   PetscInt       i,*idxb = 0;
2828   PetscScalar    *va,*vb;
2829   Vec            vtmp;
2830 
2831   PetscFunctionBegin;
2832   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2833   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2834   if (idx) {
2835     for (i=0; i<A->rmap->n; i++) {
2836       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2837     }
2838   }
2839 
2840   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2841   if (idx) {
2842     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2843   }
2844   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2845   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2846 
2847   for (i=0; i<A->rmap->n; i++){
2848     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2849       va[i] = vb[i];
2850       if (idx) idx[i] = a->garray[idxb[i]];
2851     }
2852   }
2853 
2854   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2855   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2856   ierr = PetscFree(idxb);CHKERRQ(ierr);
2857   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2863 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2864 {
2865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2866   PetscErrorCode ierr;
2867   PetscInt       i,*idxb = 0;
2868   PetscScalar    *va,*vb;
2869   Vec            vtmp;
2870 
2871   PetscFunctionBegin;
2872   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2873   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2874   if (idx) {
2875     for (i=0; i<A->cmap->n; i++) {
2876       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2877     }
2878   }
2879 
2880   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2881   if (idx) {
2882     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2883   }
2884   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2885   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2886 
2887   for (i=0; i<A->rmap->n; i++){
2888     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2889       va[i] = vb[i];
2890       if (idx) idx[i] = a->garray[idxb[i]];
2891     }
2892   }
2893 
2894   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2895   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2896   ierr = PetscFree(idxb);CHKERRQ(ierr);
2897   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 #undef __FUNCT__
2902 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2903 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2904 {
2905   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2906   PetscInt       n      = A->rmap->n;
2907   PetscInt       cstart = A->cmap->rstart;
2908   PetscInt      *cmap   = mat->garray;
2909   PetscInt      *diagIdx, *offdiagIdx;
2910   Vec            diagV, offdiagV;
2911   PetscScalar   *a, *diagA, *offdiagA;
2912   PetscInt       r;
2913   PetscErrorCode ierr;
2914 
2915   PetscFunctionBegin;
2916   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2917   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2918   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2919   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2920   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2921   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2922   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2923   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2924   for(r = 0; r < n; ++r) {
2925     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2926       a[r]   = diagA[r];
2927       idx[r] = cstart + diagIdx[r];
2928     } else {
2929       a[r]   = offdiagA[r];
2930       idx[r] = cmap[offdiagIdx[r]];
2931     }
2932   }
2933   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2934   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2935   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2936   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2937   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2938   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2939   PetscFunctionReturn(0);
2940 }
2941 
2942 #undef __FUNCT__
2943 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2944 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2945 {
2946   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2947   PetscInt       n      = A->rmap->n;
2948   PetscInt       cstart = A->cmap->rstart;
2949   PetscInt      *cmap   = mat->garray;
2950   PetscInt      *diagIdx, *offdiagIdx;
2951   Vec            diagV, offdiagV;
2952   PetscScalar   *a, *diagA, *offdiagA;
2953   PetscInt       r;
2954   PetscErrorCode ierr;
2955 
2956   PetscFunctionBegin;
2957   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2958   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2959   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2960   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2961   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2962   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2963   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2964   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2965   for(r = 0; r < n; ++r) {
2966     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2967       a[r]   = diagA[r];
2968       idx[r] = cstart + diagIdx[r];
2969     } else {
2970       a[r]   = offdiagA[r];
2971       idx[r] = cmap[offdiagIdx[r]];
2972     }
2973   }
2974   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2975   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2976   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2977   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2978   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2979   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 #undef __FUNCT__
2984 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2985 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2986 {
2987   PetscErrorCode ierr;
2988   Mat            *dummy;
2989 
2990   PetscFunctionBegin;
2991   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2992   *newmat = *dummy;
2993   ierr = PetscFree(dummy);CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3001 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3002 {
3003   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3004   PetscErrorCode ierr;
3005 
3006   PetscFunctionBegin;
3007   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3008   PetscFunctionReturn(0);
3009 }
3010 
3011 
3012 /* -------------------------------------------------------------------*/
3013 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3014        MatGetRow_MPIAIJ,
3015        MatRestoreRow_MPIAIJ,
3016        MatMult_MPIAIJ,
3017 /* 4*/ MatMultAdd_MPIAIJ,
3018        MatMultTranspose_MPIAIJ,
3019        MatMultTransposeAdd_MPIAIJ,
3020 #ifdef PETSC_HAVE_PBGL
3021        MatSolve_MPIAIJ,
3022 #else
3023        0,
3024 #endif
3025        0,
3026        0,
3027 /*10*/ 0,
3028        0,
3029        0,
3030        MatSOR_MPIAIJ,
3031        MatTranspose_MPIAIJ,
3032 /*15*/ MatGetInfo_MPIAIJ,
3033        MatEqual_MPIAIJ,
3034        MatGetDiagonal_MPIAIJ,
3035        MatDiagonalScale_MPIAIJ,
3036        MatNorm_MPIAIJ,
3037 /*20*/ MatAssemblyBegin_MPIAIJ,
3038        MatAssemblyEnd_MPIAIJ,
3039        MatSetOption_MPIAIJ,
3040        MatZeroEntries_MPIAIJ,
3041 /*24*/ MatZeroRows_MPIAIJ,
3042        0,
3043 #ifdef PETSC_HAVE_PBGL
3044        0,
3045 #else
3046        0,
3047 #endif
3048        0,
3049        0,
3050 /*29*/ MatSetUp_MPIAIJ,
3051 #ifdef PETSC_HAVE_PBGL
3052        0,
3053 #else
3054        0,
3055 #endif
3056        0,
3057        0,
3058        0,
3059 /*34*/ MatDuplicate_MPIAIJ,
3060        0,
3061        0,
3062        0,
3063        0,
3064 /*39*/ MatAXPY_MPIAIJ,
3065        MatGetSubMatrices_MPIAIJ,
3066        MatIncreaseOverlap_MPIAIJ,
3067        MatGetValues_MPIAIJ,
3068        MatCopy_MPIAIJ,
3069 /*44*/ MatGetRowMax_MPIAIJ,
3070        MatScale_MPIAIJ,
3071        0,
3072        0,
3073        MatZeroRowsColumns_MPIAIJ,
3074 /*49*/ 0,
3075        0,
3076        0,
3077        0,
3078        0,
3079 /*54*/ MatFDColoringCreate_MPIAIJ,
3080        0,
3081        MatSetUnfactored_MPIAIJ,
3082        0, /* MatPermute_MPIAIJ, impl currently broken */
3083        0,
3084 /*59*/ MatGetSubMatrix_MPIAIJ,
3085        MatDestroy_MPIAIJ,
3086        MatView_MPIAIJ,
3087        0,
3088        0,
3089 /*64*/ 0,
3090        0,
3091        0,
3092        0,
3093        0,
3094 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3095        MatGetRowMinAbs_MPIAIJ,
3096        0,
3097        MatSetColoring_MPIAIJ,
3098 #if defined(PETSC_HAVE_ADIC)
3099        MatSetValuesAdic_MPIAIJ,
3100 #else
3101        0,
3102 #endif
3103        MatSetValuesAdifor_MPIAIJ,
3104 /*75*/ MatFDColoringApply_AIJ,
3105        0,
3106        0,
3107        0,
3108        0,
3109 /*80*/ 0,
3110        0,
3111        0,
3112 /*83*/ MatLoad_MPIAIJ,
3113        0,
3114        0,
3115        0,
3116        0,
3117        0,
3118 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3119        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3120        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3121        MatPtAP_Basic,
3122        MatPtAPSymbolic_MPIAIJ,
3123 /*94*/ MatPtAPNumeric_MPIAIJ,
3124        0,
3125        0,
3126        0,
3127        0,
3128 /*99*/ 0,
3129        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3130        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3131        MatConjugate_MPIAIJ,
3132        0,
3133 /*104*/MatSetValuesRow_MPIAIJ,
3134        MatRealPart_MPIAIJ,
3135        MatImaginaryPart_MPIAIJ,
3136        0,
3137        0,
3138 /*109*/0,
3139        MatGetRedundantMatrix_MPIAIJ,
3140        MatGetRowMin_MPIAIJ,
3141        0,
3142        0,
3143 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3144        0,
3145        0,
3146        0,
3147        0,
3148 /*119*/0,
3149        0,
3150        0,
3151        0,
3152        MatGetMultiProcBlock_MPIAIJ,
3153 /*124*/MatFindNonZeroRows_MPIAIJ,
3154        MatGetColumnNorms_MPIAIJ,
3155        MatInvertBlockDiagonal_MPIAIJ,
3156        0,
3157        MatGetSubMatricesParallel_MPIAIJ,
3158 /*129*/0,
3159        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3160        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3161        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3162        0,
3163 /*134*/0,
3164        0,
3165        0,
3166        0,
3167        0
3168 };
3169 
3170 /* ----------------------------------------------------------------------------------------*/
3171 
3172 EXTERN_C_BEGIN
3173 #undef __FUNCT__
3174 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3175 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3176 {
3177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3178   PetscErrorCode ierr;
3179 
3180   PetscFunctionBegin;
3181   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3182   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3183   PetscFunctionReturn(0);
3184 }
3185 EXTERN_C_END
3186 
3187 EXTERN_C_BEGIN
3188 #undef __FUNCT__
3189 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3190 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3191 {
3192   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3193   PetscErrorCode ierr;
3194 
3195   PetscFunctionBegin;
3196   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3197   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3198   PetscFunctionReturn(0);
3199 }
3200 EXTERN_C_END
3201 
3202 EXTERN_C_BEGIN
3203 #undef __FUNCT__
3204 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3205 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3206 {
3207   Mat_MPIAIJ     *b;
3208   PetscErrorCode ierr;
3209   PetscInt       i;
3210   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3211 
3212   PetscFunctionBegin;
3213   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3214   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3215   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3216   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3217   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3218   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3219 
3220   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3221   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3222   if (d_nnz) {
3223     for (i=0; i<B->rmap->n; i++) {
3224       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3225     }
3226   }
3227   if (o_nnz) {
3228     for (i=0; i<B->rmap->n; i++) {
3229       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3230     }
3231   }
3232   b = (Mat_MPIAIJ*)B->data;
3233 
3234   if (!B->preallocated) {
3235     /* Explicitly create 2 MATSEQAIJ matrices. */
3236     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3237     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3238     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3239     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3240     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3241     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3242     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3243     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3244     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3245     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3246   }
3247 
3248   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3249   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3250   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3251   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3252   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3253   B->preallocated = PETSC_TRUE;
3254   PetscFunctionReturn(0);
3255 }
3256 EXTERN_C_END
3257 
3258 #undef __FUNCT__
3259 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3260 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3261 {
3262   Mat            mat;
3263   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3264   PetscErrorCode ierr;
3265 
3266   PetscFunctionBegin;
3267   *newmat       = 0;
3268   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3269   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3270   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3271   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3272   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3273   a    = (Mat_MPIAIJ*)mat->data;
3274 
3275   mat->factortype    = matin->factortype;
3276   mat->rmap->bs      = matin->rmap->bs;
3277   mat->cmap->bs      = matin->cmap->bs;
3278   mat->assembled    = PETSC_TRUE;
3279   mat->insertmode   = NOT_SET_VALUES;
3280   mat->preallocated = PETSC_TRUE;
3281 
3282   a->size           = oldmat->size;
3283   a->rank           = oldmat->rank;
3284   a->donotstash     = oldmat->donotstash;
3285   a->roworiented    = oldmat->roworiented;
3286   a->rowindices     = 0;
3287   a->rowvalues      = 0;
3288   a->getrowactive   = PETSC_FALSE;
3289 
3290   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3291   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3292 
3293   if (oldmat->colmap) {
3294 #if defined (PETSC_USE_CTABLE)
3295     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3296 #else
3297     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3298     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3299     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3300 #endif
3301   } else a->colmap = 0;
3302   if (oldmat->garray) {
3303     PetscInt len;
3304     len  = oldmat->B->cmap->n;
3305     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3306     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3307     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3308   } else a->garray = 0;
3309 
3310   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3311   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3312   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3313   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3314   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3315   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3316   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3317   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3318   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3319   *newmat = mat;
3320   PetscFunctionReturn(0);
3321 }
3322 
3323 
3324 
3325 #undef __FUNCT__
3326 #define __FUNCT__ "MatLoad_MPIAIJ"
3327 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3328 {
3329   PetscScalar    *vals,*svals;
3330   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3331   PetscErrorCode ierr;
3332   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3333   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3334   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3335   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3336   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3337   int            fd;
3338   PetscInt       bs = 1;
3339 
3340   PetscFunctionBegin;
3341   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3342   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3343   if (!rank) {
3344     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3345     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3346     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3347   }
3348 
3349   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3350   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3351   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3352 
3353   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3354 
3355   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3356   M = header[1]; N = header[2];
3357   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3358   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3359   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3360 
3361   /* If global sizes are set, check if they are consistent with that given in the file */
3362   if (sizesset) {
3363     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3364   }
3365   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3366   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3367 
3368   /* determine ownership of all (block) rows */
3369   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3370   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3371   else m = newMat->rmap->n; /* Set by user */
3372 
3373   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3374   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3375 
3376   /* First process needs enough room for process with most rows */
3377   if (!rank) {
3378     mmax       = rowners[1];
3379     for (i=2; i<size; i++) {
3380       mmax = PetscMax(mmax,rowners[i]);
3381     }
3382   } else mmax = m;
3383 
3384   rowners[0] = 0;
3385   for (i=2; i<=size; i++) {
3386     rowners[i] += rowners[i-1];
3387   }
3388   rstart = rowners[rank];
3389   rend   = rowners[rank+1];
3390 
3391   /* distribute row lengths to all processors */
3392   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3393   if (!rank) {
3394     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3395     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3396     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3397     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3398     for (j=0; j<m; j++) {
3399       procsnz[0] += ourlens[j];
3400     }
3401     for (i=1; i<size; i++) {
3402       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3403       /* calculate the number of nonzeros on each processor */
3404       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3405         procsnz[i] += rowlengths[j];
3406       }
3407       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3408     }
3409     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3410   } else {
3411     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3412   }
3413 
3414   if (!rank) {
3415     /* determine max buffer needed and allocate it */
3416     maxnz = 0;
3417     for (i=0; i<size; i++) {
3418       maxnz = PetscMax(maxnz,procsnz[i]);
3419     }
3420     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3421 
3422     /* read in my part of the matrix column indices  */
3423     nz   = procsnz[0];
3424     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3425     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3426 
3427     /* read in every one elses and ship off */
3428     for (i=1; i<size; i++) {
3429       nz     = procsnz[i];
3430       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3431       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3432     }
3433     ierr = PetscFree(cols);CHKERRQ(ierr);
3434   } else {
3435     /* determine buffer space needed for message */
3436     nz = 0;
3437     for (i=0; i<m; i++) {
3438       nz += ourlens[i];
3439     }
3440     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3441 
3442     /* receive message of column indices*/
3443     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3444   }
3445 
3446   /* determine column ownership if matrix is not square */
3447   if (N != M) {
3448     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3449     else n = newMat->cmap->n;
3450     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3451     cstart = cend - n;
3452   } else {
3453     cstart = rstart;
3454     cend   = rend;
3455     n      = cend - cstart;
3456   }
3457 
3458   /* loop over local rows, determining number of off diagonal entries */
3459   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3460   jj = 0;
3461   for (i=0; i<m; i++) {
3462     for (j=0; j<ourlens[i]; j++) {
3463       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3464       jj++;
3465     }
3466   }
3467 
3468   for (i=0; i<m; i++) {
3469     ourlens[i] -= offlens[i];
3470   }
3471   if (!sizesset) {
3472     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3473   }
3474 
3475   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3476 
3477   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3478 
3479   for (i=0; i<m; i++) {
3480     ourlens[i] += offlens[i];
3481   }
3482 
3483   if (!rank) {
3484     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3485 
3486     /* read in my part of the matrix numerical values  */
3487     nz   = procsnz[0];
3488     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3489 
3490     /* insert into matrix */
3491     jj      = rstart;
3492     smycols = mycols;
3493     svals   = vals;
3494     for (i=0; i<m; i++) {
3495       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3496       smycols += ourlens[i];
3497       svals   += ourlens[i];
3498       jj++;
3499     }
3500 
3501     /* read in other processors and ship out */
3502     for (i=1; i<size; i++) {
3503       nz     = procsnz[i];
3504       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3505       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3506     }
3507     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3508   } else {
3509     /* receive numeric values */
3510     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3511 
3512     /* receive message of values*/
3513     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3514 
3515     /* insert into matrix */
3516     jj      = rstart;
3517     smycols = mycols;
3518     svals   = vals;
3519     for (i=0; i<m; i++) {
3520       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3521       smycols += ourlens[i];
3522       svals   += ourlens[i];
3523       jj++;
3524     }
3525   }
3526   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3527   ierr = PetscFree(vals);CHKERRQ(ierr);
3528   ierr = PetscFree(mycols);CHKERRQ(ierr);
3529   ierr = PetscFree(rowners);CHKERRQ(ierr);
3530   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3531   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3532 
3533   PetscFunctionReturn(0);
3534 }
3535 
3536 #undef __FUNCT__
3537 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3538 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3539 {
3540   PetscErrorCode ierr;
3541   IS             iscol_local;
3542   PetscInt       csize;
3543 
3544   PetscFunctionBegin;
3545   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3546   if (call == MAT_REUSE_MATRIX) {
3547     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3548     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3549   } else {
3550     PetscInt cbs;
3551     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3552     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3553     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3554   }
3555   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3556   if (call == MAT_INITIAL_MATRIX) {
3557     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3558     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3559   }
3560   PetscFunctionReturn(0);
3561 }
3562 
3563 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3564 #undef __FUNCT__
3565 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3566 /*
3567     Not great since it makes two copies of the submatrix, first an SeqAIJ
3568   in local and then by concatenating the local matrices the end result.
3569   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3570 
3571   Note: This requires a sequential iscol with all indices.
3572 */
3573 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3574 {
3575   PetscErrorCode ierr;
3576   PetscMPIInt    rank,size;
3577   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3578   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3579   PetscBool      allcolumns, colflag;
3580   Mat            M,Mreuse;
3581   MatScalar      *vwork,*aa;
3582   MPI_Comm       comm = ((PetscObject)mat)->comm;
3583   Mat_SeqAIJ     *aij;
3584 
3585 
3586   PetscFunctionBegin;
3587   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3589 
3590   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3591   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3592   if (colflag && ncol == mat->cmap->N){
3593     allcolumns = PETSC_TRUE;
3594   } else {
3595     allcolumns = PETSC_FALSE;
3596   }
3597   if (call ==  MAT_REUSE_MATRIX) {
3598     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3599     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3600     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3601   } else {
3602     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3603   }
3604 
3605   /*
3606       m - number of local rows
3607       n - number of columns (same on all processors)
3608       rstart - first row in new global matrix generated
3609   */
3610   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3611   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3612   if (call == MAT_INITIAL_MATRIX) {
3613     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3614     ii  = aij->i;
3615     jj  = aij->j;
3616 
3617     /*
3618         Determine the number of non-zeros in the diagonal and off-diagonal
3619         portions of the matrix in order to do correct preallocation
3620     */
3621 
3622     /* first get start and end of "diagonal" columns */
3623     if (csize == PETSC_DECIDE) {
3624       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3625       if (mglobal == n) { /* square matrix */
3626 	nlocal = m;
3627       } else {
3628         nlocal = n/size + ((n % size) > rank);
3629       }
3630     } else {
3631       nlocal = csize;
3632     }
3633     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3634     rstart = rend - nlocal;
3635     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3636 
3637     /* next, compute all the lengths */
3638     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3639     olens = dlens + m;
3640     for (i=0; i<m; i++) {
3641       jend = ii[i+1] - ii[i];
3642       olen = 0;
3643       dlen = 0;
3644       for (j=0; j<jend; j++) {
3645         if (*jj < rstart || *jj >= rend) olen++;
3646         else dlen++;
3647         jj++;
3648       }
3649       olens[i] = olen;
3650       dlens[i] = dlen;
3651     }
3652     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3653     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3654     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3655     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3656     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3657     ierr = PetscFree(dlens);CHKERRQ(ierr);
3658   } else {
3659     PetscInt ml,nl;
3660 
3661     M = *newmat;
3662     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3663     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3664     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3665     /*
3666          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3667        rather than the slower MatSetValues().
3668     */
3669     M->was_assembled = PETSC_TRUE;
3670     M->assembled     = PETSC_FALSE;
3671   }
3672   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3673   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3674   ii  = aij->i;
3675   jj  = aij->j;
3676   aa  = aij->a;
3677   for (i=0; i<m; i++) {
3678     row   = rstart + i;
3679     nz    = ii[i+1] - ii[i];
3680     cwork = jj;     jj += nz;
3681     vwork = aa;     aa += nz;
3682     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3683   }
3684 
3685   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3686   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3687   *newmat = M;
3688 
3689   /* save submatrix used in processor for next request */
3690   if (call ==  MAT_INITIAL_MATRIX) {
3691     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3692     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3693   }
3694 
3695   PetscFunctionReturn(0);
3696 }
3697 
3698 EXTERN_C_BEGIN
3699 #undef __FUNCT__
3700 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3701 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3702 {
3703   PetscInt       m,cstart, cend,j,nnz,i,d;
3704   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3705   const PetscInt *JJ;
3706   PetscScalar    *values;
3707   PetscErrorCode ierr;
3708 
3709   PetscFunctionBegin;
3710   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3711 
3712   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3713   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3714   m      = B->rmap->n;
3715   cstart = B->cmap->rstart;
3716   cend   = B->cmap->rend;
3717   rstart = B->rmap->rstart;
3718 
3719   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3720 
3721 #if defined(PETSC_USE_DEBUGGING)
3722   for (i=0; i<m; i++) {
3723     nnz     = Ii[i+1]- Ii[i];
3724     JJ      = J + Ii[i];
3725     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3726     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3727     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3728   }
3729 #endif
3730 
3731   for (i=0; i<m; i++) {
3732     nnz     = Ii[i+1]- Ii[i];
3733     JJ      = J + Ii[i];
3734     nnz_max = PetscMax(nnz_max,nnz);
3735     d       = 0;
3736     for (j=0; j<nnz; j++) {
3737       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3738     }
3739     d_nnz[i] = d;
3740     o_nnz[i] = nnz - d;
3741   }
3742   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3743   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3744 
3745   if (v) values = (PetscScalar*)v;
3746   else {
3747     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3748     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3749   }
3750 
3751   for (i=0; i<m; i++) {
3752     ii   = i + rstart;
3753     nnz  = Ii[i+1]- Ii[i];
3754     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3755   }
3756   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3757   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3758 
3759   if (!v) {
3760     ierr = PetscFree(values);CHKERRQ(ierr);
3761   }
3762   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3763   PetscFunctionReturn(0);
3764 }
3765 EXTERN_C_END
3766 
3767 #undef __FUNCT__
3768 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3769 /*@
3770    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3771    (the default parallel PETSc format).
3772 
3773    Collective on MPI_Comm
3774 
3775    Input Parameters:
3776 +  B - the matrix
3777 .  i - the indices into j for the start of each local row (starts with zero)
3778 .  j - the column indices for each local row (starts with zero)
3779 -  v - optional values in the matrix
3780 
3781    Level: developer
3782 
3783    Notes:
3784        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3785      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3786      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3787 
3788        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3789 
3790        The format which is used for the sparse matrix input, is equivalent to a
3791     row-major ordering.. i.e for the following matrix, the input data expected is
3792     as shown:
3793 
3794         1 0 0
3795         2 0 3     P0
3796        -------
3797         4 5 6     P1
3798 
3799      Process0 [P0]: rows_owned=[0,1]
3800         i =  {0,1,3}  [size = nrow+1  = 2+1]
3801         j =  {0,0,2}  [size = nz = 6]
3802         v =  {1,2,3}  [size = nz = 6]
3803 
3804      Process1 [P1]: rows_owned=[2]
3805         i =  {0,3}    [size = nrow+1  = 1+1]
3806         j =  {0,1,2}  [size = nz = 6]
3807         v =  {4,5,6}  [size = nz = 6]
3808 
3809 .keywords: matrix, aij, compressed row, sparse, parallel
3810 
3811 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3812           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3813 @*/
3814 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3815 {
3816   PetscErrorCode ierr;
3817 
3818   PetscFunctionBegin;
3819   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3820   PetscFunctionReturn(0);
3821 }
3822 
3823 #undef __FUNCT__
3824 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3825 /*@C
3826    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3827    (the default parallel PETSc format).  For good matrix assembly performance
3828    the user should preallocate the matrix storage by setting the parameters
3829    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3830    performance can be increased by more than a factor of 50.
3831 
3832    Collective on MPI_Comm
3833 
3834    Input Parameters:
3835 +  A - the matrix
3836 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3837            (same value is used for all local rows)
3838 .  d_nnz - array containing the number of nonzeros in the various rows of the
3839            DIAGONAL portion of the local submatrix (possibly different for each row)
3840            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3841            The size of this array is equal to the number of local rows, i.e 'm'.
3842            For matrices that will be factored, you must leave room for (and set)
3843            the diagonal entry even if it is zero.
3844 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3845            submatrix (same value is used for all local rows).
3846 -  o_nnz - array containing the number of nonzeros in the various rows of the
3847            OFF-DIAGONAL portion of the local submatrix (possibly different for
3848            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3849            structure. The size of this array is equal to the number
3850            of local rows, i.e 'm'.
3851 
3852    If the *_nnz parameter is given then the *_nz parameter is ignored
3853 
3854    The AIJ format (also called the Yale sparse matrix format or
3855    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3856    storage.  The stored row and column indices begin with zero.
3857    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3858 
3859    The parallel matrix is partitioned such that the first m0 rows belong to
3860    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3861    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3862 
3863    The DIAGONAL portion of the local submatrix of a processor can be defined
3864    as the submatrix which is obtained by extraction the part corresponding to
3865    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3866    first row that belongs to the processor, r2 is the last row belonging to
3867    the this processor, and c1-c2 is range of indices of the local part of a
3868    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3869    common case of a square matrix, the row and column ranges are the same and
3870    the DIAGONAL part is also square. The remaining portion of the local
3871    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3872 
3873    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3874 
3875    You can call MatGetInfo() to get information on how effective the preallocation was;
3876    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3877    You can also run with the option -info and look for messages with the string
3878    malloc in them to see if additional memory allocation was needed.
3879 
3880    Example usage:
3881 
3882    Consider the following 8x8 matrix with 34 non-zero values, that is
3883    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3884    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3885    as follows:
3886 
3887 .vb
3888             1  2  0  |  0  3  0  |  0  4
3889     Proc0   0  5  6  |  7  0  0  |  8  0
3890             9  0 10  | 11  0  0  | 12  0
3891     -------------------------------------
3892            13  0 14  | 15 16 17  |  0  0
3893     Proc1   0 18  0  | 19 20 21  |  0  0
3894             0  0  0  | 22 23  0  | 24  0
3895     -------------------------------------
3896     Proc2  25 26 27  |  0  0 28  | 29  0
3897            30  0  0  | 31 32 33  |  0 34
3898 .ve
3899 
3900    This can be represented as a collection of submatrices as:
3901 
3902 .vb
3903       A B C
3904       D E F
3905       G H I
3906 .ve
3907 
3908    Where the submatrices A,B,C are owned by proc0, D,E,F are
3909    owned by proc1, G,H,I are owned by proc2.
3910 
3911    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3912    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3913    The 'M','N' parameters are 8,8, and have the same values on all procs.
3914 
3915    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3916    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3917    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3918    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3919    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3920    matrix, ans [DF] as another SeqAIJ matrix.
3921 
3922    When d_nz, o_nz parameters are specified, d_nz storage elements are
3923    allocated for every row of the local diagonal submatrix, and o_nz
3924    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3925    One way to choose d_nz and o_nz is to use the max nonzerors per local
3926    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3927    In this case, the values of d_nz,o_nz are:
3928 .vb
3929      proc0 : dnz = 2, o_nz = 2
3930      proc1 : dnz = 3, o_nz = 2
3931      proc2 : dnz = 1, o_nz = 4
3932 .ve
3933    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3934    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3935    for proc3. i.e we are using 12+15+10=37 storage locations to store
3936    34 values.
3937 
3938    When d_nnz, o_nnz parameters are specified, the storage is specified
3939    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3940    In the above case the values for d_nnz,o_nnz are:
3941 .vb
3942      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3943      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3944      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3945 .ve
3946    Here the space allocated is sum of all the above values i.e 34, and
3947    hence pre-allocation is perfect.
3948 
3949    Level: intermediate
3950 
3951 .keywords: matrix, aij, compressed row, sparse, parallel
3952 
3953 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3954           MPIAIJ, MatGetInfo()
3955 @*/
3956 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3957 {
3958   PetscErrorCode ierr;
3959 
3960   PetscFunctionBegin;
3961   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3962   PetscValidType(B,1);
3963   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3964   PetscFunctionReturn(0);
3965 }
3966 
3967 #undef __FUNCT__
3968 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3969 /*@
3970      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3971          CSR format the local rows.
3972 
3973    Collective on MPI_Comm
3974 
3975    Input Parameters:
3976 +  comm - MPI communicator
3977 .  m - number of local rows (Cannot be PETSC_DECIDE)
3978 .  n - This value should be the same as the local size used in creating the
3979        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3980        calculated if N is given) For square matrices n is almost always m.
3981 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3982 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3983 .   i - row indices
3984 .   j - column indices
3985 -   a - matrix values
3986 
3987    Output Parameter:
3988 .   mat - the matrix
3989 
3990    Level: intermediate
3991 
3992    Notes:
3993        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3994      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3995      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3996 
3997        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3998 
3999        The format which is used for the sparse matrix input, is equivalent to a
4000     row-major ordering.. i.e for the following matrix, the input data expected is
4001     as shown:
4002 
4003         1 0 0
4004         2 0 3     P0
4005        -------
4006         4 5 6     P1
4007 
4008      Process0 [P0]: rows_owned=[0,1]
4009         i =  {0,1,3}  [size = nrow+1  = 2+1]
4010         j =  {0,0,2}  [size = nz = 6]
4011         v =  {1,2,3}  [size = nz = 6]
4012 
4013      Process1 [P1]: rows_owned=[2]
4014         i =  {0,3}    [size = nrow+1  = 1+1]
4015         j =  {0,1,2}  [size = nz = 6]
4016         v =  {4,5,6}  [size = nz = 6]
4017 
4018 .keywords: matrix, aij, compressed row, sparse, parallel
4019 
4020 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4021           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4022 @*/
4023 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4024 {
4025   PetscErrorCode ierr;
4026 
4027   PetscFunctionBegin;
4028   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4029   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4030   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4031   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4032   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4033   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4034   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4035   PetscFunctionReturn(0);
4036 }
4037 
4038 #undef __FUNCT__
4039 #define __FUNCT__ "MatCreateAIJ"
4040 /*@C
4041    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4042    (the default parallel PETSc format).  For good matrix assembly performance
4043    the user should preallocate the matrix storage by setting the parameters
4044    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4045    performance can be increased by more than a factor of 50.
4046 
4047    Collective on MPI_Comm
4048 
4049    Input Parameters:
4050 +  comm - MPI communicator
4051 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4052            This value should be the same as the local size used in creating the
4053            y vector for the matrix-vector product y = Ax.
4054 .  n - This value should be the same as the local size used in creating the
4055        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4056        calculated if N is given) For square matrices n is almost always m.
4057 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4058 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4059 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4060            (same value is used for all local rows)
4061 .  d_nnz - array containing the number of nonzeros in the various rows of the
4062            DIAGONAL portion of the local submatrix (possibly different for each row)
4063            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4064            The size of this array is equal to the number of local rows, i.e 'm'.
4065 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4066            submatrix (same value is used for all local rows).
4067 -  o_nnz - array containing the number of nonzeros in the various rows of the
4068            OFF-DIAGONAL portion of the local submatrix (possibly different for
4069            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4070            structure. The size of this array is equal to the number
4071            of local rows, i.e 'm'.
4072 
4073    Output Parameter:
4074 .  A - the matrix
4075 
4076    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4077    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4078    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4079 
4080    Notes:
4081    If the *_nnz parameter is given then the *_nz parameter is ignored
4082 
4083    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4084    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4085    storage requirements for this matrix.
4086 
4087    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4088    processor than it must be used on all processors that share the object for
4089    that argument.
4090 
4091    The user MUST specify either the local or global matrix dimensions
4092    (possibly both).
4093 
4094    The parallel matrix is partitioned across processors such that the
4095    first m0 rows belong to process 0, the next m1 rows belong to
4096    process 1, the next m2 rows belong to process 2 etc.. where
4097    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4098    values corresponding to [m x N] submatrix.
4099 
4100    The columns are logically partitioned with the n0 columns belonging
4101    to 0th partition, the next n1 columns belonging to the next
4102    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4103 
4104    The DIAGONAL portion of the local submatrix on any given processor
4105    is the submatrix corresponding to the rows and columns m,n
4106    corresponding to the given processor. i.e diagonal matrix on
4107    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4108    etc. The remaining portion of the local submatrix [m x (N-n)]
4109    constitute the OFF-DIAGONAL portion. The example below better
4110    illustrates this concept.
4111 
4112    For a square global matrix we define each processor's diagonal portion
4113    to be its local rows and the corresponding columns (a square submatrix);
4114    each processor's off-diagonal portion encompasses the remainder of the
4115    local matrix (a rectangular submatrix).
4116 
4117    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4118 
4119    When calling this routine with a single process communicator, a matrix of
4120    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4121    type of communicator, use the construction mechanism:
4122      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4123 
4124    By default, this format uses inodes (identical nodes) when possible.
4125    We search for consecutive rows with the same nonzero structure, thereby
4126    reusing matrix information to achieve increased efficiency.
4127 
4128    Options Database Keys:
4129 +  -mat_no_inode  - Do not use inodes
4130 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4131 -  -mat_aij_oneindex - Internally use indexing starting at 1
4132         rather than 0.  Note that when calling MatSetValues(),
4133         the user still MUST index entries starting at 0!
4134 
4135 
4136    Example usage:
4137 
4138    Consider the following 8x8 matrix with 34 non-zero values, that is
4139    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4140    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4141    as follows:
4142 
4143 .vb
4144             1  2  0  |  0  3  0  |  0  4
4145     Proc0   0  5  6  |  7  0  0  |  8  0
4146             9  0 10  | 11  0  0  | 12  0
4147     -------------------------------------
4148            13  0 14  | 15 16 17  |  0  0
4149     Proc1   0 18  0  | 19 20 21  |  0  0
4150             0  0  0  | 22 23  0  | 24  0
4151     -------------------------------------
4152     Proc2  25 26 27  |  0  0 28  | 29  0
4153            30  0  0  | 31 32 33  |  0 34
4154 .ve
4155 
4156    This can be represented as a collection of submatrices as:
4157 
4158 .vb
4159       A B C
4160       D E F
4161       G H I
4162 .ve
4163 
4164    Where the submatrices A,B,C are owned by proc0, D,E,F are
4165    owned by proc1, G,H,I are owned by proc2.
4166 
4167    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4168    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4169    The 'M','N' parameters are 8,8, and have the same values on all procs.
4170 
4171    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4172    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4173    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4174    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4175    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4176    matrix, ans [DF] as another SeqAIJ matrix.
4177 
4178    When d_nz, o_nz parameters are specified, d_nz storage elements are
4179    allocated for every row of the local diagonal submatrix, and o_nz
4180    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4181    One way to choose d_nz and o_nz is to use the max nonzerors per local
4182    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4183    In this case, the values of d_nz,o_nz are:
4184 .vb
4185      proc0 : dnz = 2, o_nz = 2
4186      proc1 : dnz = 3, o_nz = 2
4187      proc2 : dnz = 1, o_nz = 4
4188 .ve
4189    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4190    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4191    for proc3. i.e we are using 12+15+10=37 storage locations to store
4192    34 values.
4193 
4194    When d_nnz, o_nnz parameters are specified, the storage is specified
4195    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4196    In the above case the values for d_nnz,o_nnz are:
4197 .vb
4198      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4199      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4200      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4201 .ve
4202    Here the space allocated is sum of all the above values i.e 34, and
4203    hence pre-allocation is perfect.
4204 
4205    Level: intermediate
4206 
4207 .keywords: matrix, aij, compressed row, sparse, parallel
4208 
4209 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4210           MPIAIJ, MatCreateMPIAIJWithArrays()
4211 @*/
4212 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4213 {
4214   PetscErrorCode ierr;
4215   PetscMPIInt    size;
4216 
4217   PetscFunctionBegin;
4218   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4219   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4220   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4221   if (size > 1) {
4222     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4223     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4224   } else {
4225     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4226     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4227   }
4228   PetscFunctionReturn(0);
4229 }
4230 
4231 #undef __FUNCT__
4232 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4233 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4234 {
4235   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4236 
4237   PetscFunctionBegin;
4238   *Ad     = a->A;
4239   *Ao     = a->B;
4240   *colmap = a->garray;
4241   PetscFunctionReturn(0);
4242 }
4243 
4244 #undef __FUNCT__
4245 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4246 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4247 {
4248   PetscErrorCode ierr;
4249   PetscInt       i;
4250   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4251 
4252   PetscFunctionBegin;
4253   if (coloring->ctype == IS_COLORING_GLOBAL) {
4254     ISColoringValue *allcolors,*colors;
4255     ISColoring      ocoloring;
4256 
4257     /* set coloring for diagonal portion */
4258     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4259 
4260     /* set coloring for off-diagonal portion */
4261     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4262     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4263     for (i=0; i<a->B->cmap->n; i++) {
4264       colors[i] = allcolors[a->garray[i]];
4265     }
4266     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4267     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4268     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4269     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4270   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4271     ISColoringValue *colors;
4272     PetscInt        *larray;
4273     ISColoring      ocoloring;
4274 
4275     /* set coloring for diagonal portion */
4276     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4277     for (i=0; i<a->A->cmap->n; i++) {
4278       larray[i] = i + A->cmap->rstart;
4279     }
4280     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4281     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4282     for (i=0; i<a->A->cmap->n; i++) {
4283       colors[i] = coloring->colors[larray[i]];
4284     }
4285     ierr = PetscFree(larray);CHKERRQ(ierr);
4286     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4287     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4288     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4289 
4290     /* set coloring for off-diagonal portion */
4291     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4292     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4293     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4294     for (i=0; i<a->B->cmap->n; i++) {
4295       colors[i] = coloring->colors[larray[i]];
4296     }
4297     ierr = PetscFree(larray);CHKERRQ(ierr);
4298     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4299     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4300     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4301   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4302 
4303   PetscFunctionReturn(0);
4304 }
4305 
4306 #if defined(PETSC_HAVE_ADIC)
4307 #undef __FUNCT__
4308 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4309 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4310 {
4311   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4312   PetscErrorCode ierr;
4313 
4314   PetscFunctionBegin;
4315   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4316   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4317   PetscFunctionReturn(0);
4318 }
4319 #endif
4320 
4321 #undef __FUNCT__
4322 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4323 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4324 {
4325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4326   PetscErrorCode ierr;
4327 
4328   PetscFunctionBegin;
4329   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4330   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4331   PetscFunctionReturn(0);
4332 }
4333 
4334 #undef __FUNCT__
4335 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4336 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4337 {
4338   PetscErrorCode ierr;
4339   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4340   PetscInt       *indx;
4341 
4342   PetscFunctionBegin;
4343   /* This routine will ONLY return MPIAIJ type matrix */
4344   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4345   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4346   if (n == PETSC_DECIDE){
4347     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4348   }
4349   /* Check sum(n) = N */
4350   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4351   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4352 
4353   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4354   rstart -= m;
4355 
4356   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4357   for (i=0;i<m;i++) {
4358     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4359     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4360     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4361   }
4362 
4363   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4364   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4365   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4366   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4367   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4368   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4369   PetscFunctionReturn(0);
4370 }
4371 
4372 #undef __FUNCT__
4373 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4374 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4375 {
4376   PetscErrorCode ierr;
4377   PetscInt       m,N,i,rstart,nnz,Ii;
4378   PetscInt       *indx;
4379   PetscScalar    *values;
4380 
4381   PetscFunctionBegin;
4382   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4383   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4384   for (i=0;i<m;i++) {
4385     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4386     Ii    = i + rstart;
4387     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4388     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4389   }
4390   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4391   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4392   PetscFunctionReturn(0);
4393 }
4394 
4395 #undef __FUNCT__
4396 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4397 /*@
4398       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4399                  matrices from each processor
4400 
4401     Collective on MPI_Comm
4402 
4403    Input Parameters:
4404 +    comm - the communicators the parallel matrix will live on
4405 .    inmat - the input sequential matrices
4406 .    n - number of local columns (or PETSC_DECIDE)
4407 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4408 
4409    Output Parameter:
4410 .    outmat - the parallel matrix generated
4411 
4412     Level: advanced
4413 
4414    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4415 
4416 @*/
4417 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4418 {
4419   PetscErrorCode ierr;
4420 
4421   PetscFunctionBegin;
4422   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4423   if (scall == MAT_INITIAL_MATRIX){
4424     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4425   }
4426   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4427   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #undef __FUNCT__
4432 #define __FUNCT__ "MatFileSplit"
4433 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4434 {
4435   PetscErrorCode    ierr;
4436   PetscMPIInt       rank;
4437   PetscInt          m,N,i,rstart,nnz;
4438   size_t            len;
4439   const PetscInt    *indx;
4440   PetscViewer       out;
4441   char              *name;
4442   Mat               B;
4443   const PetscScalar *values;
4444 
4445   PetscFunctionBegin;
4446   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4447   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4448   /* Should this be the type of the diagonal block of A? */
4449   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4450   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4451   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4452   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4453   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4454   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4455   for (i=0;i<m;i++) {
4456     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4457     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4458     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4459   }
4460   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4461   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4462 
4463   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4464   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4465   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4466   sprintf(name,"%s.%d",outfile,rank);
4467   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4468   ierr = PetscFree(name);
4469   ierr = MatView(B,out);CHKERRQ(ierr);
4470   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4471   ierr = MatDestroy(&B);CHKERRQ(ierr);
4472   PetscFunctionReturn(0);
4473 }
4474 
4475 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4476 #undef __FUNCT__
4477 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4478 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4479 {
4480   PetscErrorCode       ierr;
4481   Mat_Merge_SeqsToMPI  *merge;
4482   PetscContainer       container;
4483 
4484   PetscFunctionBegin;
4485   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4486   if (container) {
4487     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4500     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4501     ierr = PetscFree(merge);CHKERRQ(ierr);
4502     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4503   }
4504   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4505   PetscFunctionReturn(0);
4506 }
4507 
4508 #include <../src/mat/utils/freespace.h>
4509 #include <petscbt.h>
4510 
4511 #undef __FUNCT__
4512 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4513 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4514 {
4515   PetscErrorCode       ierr;
4516   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4517   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4518   PetscMPIInt          size,rank,taga,*len_s;
4519   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4520   PetscInt             proc,m;
4521   PetscInt             **buf_ri,**buf_rj;
4522   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4523   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4524   MPI_Request          *s_waits,*r_waits;
4525   MPI_Status           *status;
4526   MatScalar            *aa=a->a;
4527   MatScalar            **abuf_r,*ba_i;
4528   Mat_Merge_SeqsToMPI  *merge;
4529   PetscContainer       container;
4530 
4531   PetscFunctionBegin;
4532   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4533 
4534   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4535   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4536 
4537   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4538   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4539 
4540   bi     = merge->bi;
4541   bj     = merge->bj;
4542   buf_ri = merge->buf_ri;
4543   buf_rj = merge->buf_rj;
4544 
4545   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4546   owners = merge->rowmap->range;
4547   len_s  = merge->len_s;
4548 
4549   /* send and recv matrix values */
4550   /*-----------------------------*/
4551   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4552   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4553 
4554   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4555   for (proc=0,k=0; proc<size; proc++){
4556     if (!len_s[proc]) continue;
4557     i = owners[proc];
4558     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4559     k++;
4560   }
4561 
4562   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4563   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4564   ierr = PetscFree(status);CHKERRQ(ierr);
4565 
4566   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4567   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4568 
4569   /* insert mat values of mpimat */
4570   /*----------------------------*/
4571   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4572   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4573 
4574   for (k=0; k<merge->nrecv; k++){
4575     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4576     nrows = *(buf_ri_k[k]);
4577     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4578     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4579   }
4580 
4581   /* set values of ba */
4582   m = merge->rowmap->n;
4583   for (i=0; i<m; i++) {
4584     arow = owners[rank] + i;
4585     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4586     bnzi = bi[i+1] - bi[i];
4587     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4588 
4589     /* add local non-zero vals of this proc's seqmat into ba */
4590     anzi = ai[arow+1] - ai[arow];
4591     aj   = a->j + ai[arow];
4592     aa   = a->a + ai[arow];
4593     nextaj = 0;
4594     for (j=0; nextaj<anzi; j++){
4595       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4596         ba_i[j] += aa[nextaj++];
4597       }
4598     }
4599 
4600     /* add received vals into ba */
4601     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4602       /* i-th row */
4603       if (i == *nextrow[k]) {
4604         anzi = *(nextai[k]+1) - *nextai[k];
4605         aj   = buf_rj[k] + *(nextai[k]);
4606         aa   = abuf_r[k] + *(nextai[k]);
4607         nextaj = 0;
4608         for (j=0; nextaj<anzi; j++){
4609           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4610             ba_i[j] += aa[nextaj++];
4611           }
4612         }
4613         nextrow[k]++; nextai[k]++;
4614       }
4615     }
4616     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4617   }
4618   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4619   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4620 
4621   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4622   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4623   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4624   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4625   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4626   PetscFunctionReturn(0);
4627 }
4628 
4629 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4630 
4631 #undef __FUNCT__
4632 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4633 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4634 {
4635   PetscErrorCode       ierr;
4636   Mat                  B_mpi;
4637   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4638   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4639   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4640   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4641   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4642   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4643   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4644   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4645   MPI_Status           *status;
4646   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4647   PetscBT              lnkbt;
4648   Mat_Merge_SeqsToMPI  *merge;
4649   PetscContainer       container;
4650 
4651   PetscFunctionBegin;
4652   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4653 
4654   /* make sure it is a PETSc comm */
4655   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4657   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4658 
4659   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4660   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4661 
4662   /* determine row ownership */
4663   /*---------------------------------------------------------*/
4664   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4665   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4666   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4667   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4669   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4670   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4671 
4672   m      = merge->rowmap->n;
4673   M      = merge->rowmap->N;
4674   owners = merge->rowmap->range;
4675 
4676   /* determine the number of messages to send, their lengths */
4677   /*---------------------------------------------------------*/
4678   len_s  = merge->len_s;
4679 
4680   len = 0;  /* length of buf_si[] */
4681   merge->nsend = 0;
4682   for (proc=0; proc<size; proc++){
4683     len_si[proc] = 0;
4684     if (proc == rank){
4685       len_s[proc] = 0;
4686     } else {
4687       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4688       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4689     }
4690     if (len_s[proc]) {
4691       merge->nsend++;
4692       nrows = 0;
4693       for (i=owners[proc]; i<owners[proc+1]; i++){
4694         if (ai[i+1] > ai[i]) nrows++;
4695       }
4696       len_si[proc] = 2*(nrows+1);
4697       len += len_si[proc];
4698     }
4699   }
4700 
4701   /* determine the number and length of messages to receive for ij-structure */
4702   /*-------------------------------------------------------------------------*/
4703   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4704   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4705 
4706   /* post the Irecv of j-structure */
4707   /*-------------------------------*/
4708   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4709   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4710 
4711   /* post the Isend of j-structure */
4712   /*--------------------------------*/
4713   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4714 
4715   for (proc=0, k=0; proc<size; proc++){
4716     if (!len_s[proc]) continue;
4717     i = owners[proc];
4718     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4719     k++;
4720   }
4721 
4722   /* receives and sends of j-structure are complete */
4723   /*------------------------------------------------*/
4724   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4725   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4726 
4727   /* send and recv i-structure */
4728   /*---------------------------*/
4729   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4730   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4731 
4732   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4733   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4734   for (proc=0,k=0; proc<size; proc++){
4735     if (!len_s[proc]) continue;
4736     /* form outgoing message for i-structure:
4737          buf_si[0]:                 nrows to be sent
4738                [1:nrows]:           row index (global)
4739                [nrows+1:2*nrows+1]: i-structure index
4740     */
4741     /*-------------------------------------------*/
4742     nrows = len_si[proc]/2 - 1;
4743     buf_si_i    = buf_si + nrows+1;
4744     buf_si[0]   = nrows;
4745     buf_si_i[0] = 0;
4746     nrows = 0;
4747     for (i=owners[proc]; i<owners[proc+1]; i++){
4748       anzi = ai[i+1] - ai[i];
4749       if (anzi) {
4750         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4751         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4752         nrows++;
4753       }
4754     }
4755     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4756     k++;
4757     buf_si += len_si[proc];
4758   }
4759 
4760   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4761   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4762 
4763   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4764   for (i=0; i<merge->nrecv; i++){
4765     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4766   }
4767 
4768   ierr = PetscFree(len_si);CHKERRQ(ierr);
4769   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4770   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4771   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4772   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4773   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4774   ierr = PetscFree(status);CHKERRQ(ierr);
4775 
4776   /* compute a local seq matrix in each processor */
4777   /*----------------------------------------------*/
4778   /* allocate bi array and free space for accumulating nonzero column info */
4779   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4780   bi[0] = 0;
4781 
4782   /* create and initialize a linked list */
4783   nlnk = N+1;
4784   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4785 
4786   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4787   len = 0;
4788   len  = ai[owners[rank+1]] - ai[owners[rank]];
4789   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4790   current_space = free_space;
4791 
4792   /* determine symbolic info for each local row */
4793   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4794 
4795   for (k=0; k<merge->nrecv; k++){
4796     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4797     nrows = *buf_ri_k[k];
4798     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4799     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4800   }
4801 
4802   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4803   len = 0;
4804   for (i=0;i<m;i++) {
4805     bnzi   = 0;
4806     /* add local non-zero cols of this proc's seqmat into lnk */
4807     arow   = owners[rank] + i;
4808     anzi   = ai[arow+1] - ai[arow];
4809     aj     = a->j + ai[arow];
4810     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4811     bnzi += nlnk;
4812     /* add received col data into lnk */
4813     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4814       if (i == *nextrow[k]) { /* i-th row */
4815         anzi = *(nextai[k]+1) - *nextai[k];
4816         aj   = buf_rj[k] + *nextai[k];
4817         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4818         bnzi += nlnk;
4819         nextrow[k]++; nextai[k]++;
4820       }
4821     }
4822     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4823 
4824     /* if free space is not available, make more free space */
4825     if (current_space->local_remaining<bnzi) {
4826       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4827       nspacedouble++;
4828     }
4829     /* copy data into free space, then initialize lnk */
4830     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4831     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4832 
4833     current_space->array           += bnzi;
4834     current_space->local_used      += bnzi;
4835     current_space->local_remaining -= bnzi;
4836 
4837     bi[i+1] = bi[i] + bnzi;
4838   }
4839 
4840   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4841 
4842   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4843   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4844   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4845 
4846   /* create symbolic parallel matrix B_mpi */
4847   /*---------------------------------------*/
4848     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4849   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4850   if (n==PETSC_DECIDE) {
4851     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4852   } else {
4853     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4854   }
4855   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4856   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4857   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4858   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4859   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4860 
4861   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4862   B_mpi->assembled     = PETSC_FALSE;
4863   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4864   merge->bi            = bi;
4865   merge->bj            = bj;
4866   merge->buf_ri        = buf_ri;
4867   merge->buf_rj        = buf_rj;
4868   merge->coi           = PETSC_NULL;
4869   merge->coj           = PETSC_NULL;
4870   merge->owners_co     = PETSC_NULL;
4871 
4872   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4873 
4874   /* attach the supporting struct to B_mpi for reuse */
4875   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4876   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4877   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4878   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4879   *mpimat = B_mpi;
4880 
4881   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4882   PetscFunctionReturn(0);
4883 }
4884 
4885 #undef __FUNCT__
4886 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4887 /*@C
4888       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4889                  matrices from each processor
4890 
4891     Collective on MPI_Comm
4892 
4893    Input Parameters:
4894 +    comm - the communicators the parallel matrix will live on
4895 .    seqmat - the input sequential matrices
4896 .    m - number of local rows (or PETSC_DECIDE)
4897 .    n - number of local columns (or PETSC_DECIDE)
4898 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4899 
4900    Output Parameter:
4901 .    mpimat - the parallel matrix generated
4902 
4903     Level: advanced
4904 
4905    Notes:
4906      The dimensions of the sequential matrix in each processor MUST be the same.
4907      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4908      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4909 @*/
4910 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4911 {
4912   PetscErrorCode   ierr;
4913   PetscMPIInt     size;
4914 
4915   PetscFunctionBegin;
4916   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4917   if (size == 1){
4918      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4919      if (scall == MAT_INITIAL_MATRIX){
4920        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4921      } else {
4922        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4923      }
4924      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4925      PetscFunctionReturn(0);
4926   }
4927   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4928   if (scall == MAT_INITIAL_MATRIX){
4929     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4930   }
4931   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4932   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4933   PetscFunctionReturn(0);
4934 }
4935 
4936 #undef __FUNCT__
4937 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4938 /*@
4939      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4940           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4941           with MatGetSize()
4942 
4943     Not Collective
4944 
4945    Input Parameters:
4946 +    A - the matrix
4947 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4948 
4949    Output Parameter:
4950 .    A_loc - the local sequential matrix generated
4951 
4952     Level: developer
4953 
4954 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4955 
4956 @*/
4957 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4958 {
4959   PetscErrorCode  ierr;
4960   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4961   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4962   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4963   MatScalar       *aa=a->a,*ba=b->a,*cam;
4964   PetscScalar     *ca;
4965   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4966   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4967   PetscBool       match;
4968 
4969   PetscFunctionBegin;
4970   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4971   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4972   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4973   if (scall == MAT_INITIAL_MATRIX){
4974     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4975     ci[0] = 0;
4976     for (i=0; i<am; i++){
4977       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4978     }
4979     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4980     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4981     k = 0;
4982     for (i=0; i<am; i++) {
4983       ncols_o = bi[i+1] - bi[i];
4984       ncols_d = ai[i+1] - ai[i];
4985       /* off-diagonal portion of A */
4986       for (jo=0; jo<ncols_o; jo++) {
4987         col = cmap[*bj];
4988         if (col >= cstart) break;
4989         cj[k]   = col; bj++;
4990         ca[k++] = *ba++;
4991       }
4992       /* diagonal portion of A */
4993       for (j=0; j<ncols_d; j++) {
4994         cj[k]   = cstart + *aj++;
4995         ca[k++] = *aa++;
4996       }
4997       /* off-diagonal portion of A */
4998       for (j=jo; j<ncols_o; j++) {
4999         cj[k]   = cmap[*bj++];
5000         ca[k++] = *ba++;
5001       }
5002     }
5003     /* put together the new matrix */
5004     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5005     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5006     /* Since these are PETSc arrays, change flags to free them as necessary. */
5007     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5008     mat->free_a  = PETSC_TRUE;
5009     mat->free_ij = PETSC_TRUE;
5010     mat->nonew   = 0;
5011   } else if (scall == MAT_REUSE_MATRIX){
5012     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5013     ci = mat->i; cj = mat->j; cam = mat->a;
5014     for (i=0; i<am; i++) {
5015       /* off-diagonal portion of A */
5016       ncols_o = bi[i+1] - bi[i];
5017       for (jo=0; jo<ncols_o; jo++) {
5018         col = cmap[*bj];
5019         if (col >= cstart) break;
5020         *cam++ = *ba++; bj++;
5021       }
5022       /* diagonal portion of A */
5023       ncols_d = ai[i+1] - ai[i];
5024       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5025       /* off-diagonal portion of A */
5026       for (j=jo; j<ncols_o; j++) {
5027         *cam++ = *ba++; bj++;
5028       }
5029     }
5030   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5031   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5032   PetscFunctionReturn(0);
5033 }
5034 
5035 #undef __FUNCT__
5036 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5037 /*@C
5038      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5039 
5040     Not Collective
5041 
5042    Input Parameters:
5043 +    A - the matrix
5044 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5045 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5046 
5047    Output Parameter:
5048 .    A_loc - the local sequential matrix generated
5049 
5050     Level: developer
5051 
5052 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5053 
5054 @*/
5055 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5056 {
5057   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5058   PetscErrorCode    ierr;
5059   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5060   IS                isrowa,iscola;
5061   Mat               *aloc;
5062   PetscBool       match;
5063 
5064   PetscFunctionBegin;
5065   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5066   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5067   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5068   if (!row){
5069     start = A->rmap->rstart; end = A->rmap->rend;
5070     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5071   } else {
5072     isrowa = *row;
5073   }
5074   if (!col){
5075     start = A->cmap->rstart;
5076     cmap  = a->garray;
5077     nzA   = a->A->cmap->n;
5078     nzB   = a->B->cmap->n;
5079     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5080     ncols = 0;
5081     for (i=0; i<nzB; i++) {
5082       if (cmap[i] < start) idx[ncols++] = cmap[i];
5083       else break;
5084     }
5085     imark = i;
5086     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5087     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5088     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5089   } else {
5090     iscola = *col;
5091   }
5092   if (scall != MAT_INITIAL_MATRIX){
5093     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5094     aloc[0] = *A_loc;
5095   }
5096   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5097   *A_loc = aloc[0];
5098   ierr = PetscFree(aloc);CHKERRQ(ierr);
5099   if (!row){
5100     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5101   }
5102   if (!col){
5103     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5104   }
5105   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5106   PetscFunctionReturn(0);
5107 }
5108 
5109 #undef __FUNCT__
5110 #define __FUNCT__ "MatGetBrowsOfAcols"
5111 /*@C
5112     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5113 
5114     Collective on Mat
5115 
5116    Input Parameters:
5117 +    A,B - the matrices in mpiaij format
5118 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5119 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5120 
5121    Output Parameter:
5122 +    rowb, colb - index sets of rows and columns of B to extract
5123 -    B_seq - the sequential matrix generated
5124 
5125     Level: developer
5126 
5127 @*/
5128 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5129 {
5130   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5131   PetscErrorCode    ierr;
5132   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5133   IS                isrowb,iscolb;
5134   Mat               *bseq=PETSC_NULL;
5135 
5136   PetscFunctionBegin;
5137   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5138     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5139   }
5140   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5141 
5142   if (scall == MAT_INITIAL_MATRIX){
5143     start = A->cmap->rstart;
5144     cmap  = a->garray;
5145     nzA   = a->A->cmap->n;
5146     nzB   = a->B->cmap->n;
5147     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5148     ncols = 0;
5149     for (i=0; i<nzB; i++) {  /* row < local row index */
5150       if (cmap[i] < start) idx[ncols++] = cmap[i];
5151       else break;
5152     }
5153     imark = i;
5154     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5155     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5156     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5157     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5158   } else {
5159     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5160     isrowb = *rowb; iscolb = *colb;
5161     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5162     bseq[0] = *B_seq;
5163   }
5164   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5165   *B_seq = bseq[0];
5166   ierr = PetscFree(bseq);CHKERRQ(ierr);
5167   if (!rowb){
5168     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5169   } else {
5170     *rowb = isrowb;
5171   }
5172   if (!colb){
5173     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5174   } else {
5175     *colb = iscolb;
5176   }
5177   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5178   PetscFunctionReturn(0);
5179 }
5180 
5181 #undef __FUNCT__
5182 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5183 /*
5184     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5185     of the OFF-DIAGONAL portion of local A
5186 
5187     Collective on Mat
5188 
5189    Input Parameters:
5190 +    A,B - the matrices in mpiaij format
5191 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5192 
5193    Output Parameter:
5194 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5195 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5196 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5197 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5198 
5199     Level: developer
5200 
5201 */
5202 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5203 {
5204   VecScatter_MPI_General *gen_to,*gen_from;
5205   PetscErrorCode         ierr;
5206   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5207   Mat_SeqAIJ             *b_oth;
5208   VecScatter             ctx=a->Mvctx;
5209   MPI_Comm               comm=((PetscObject)ctx)->comm;
5210   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5211   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5212   PetscScalar            *rvalues,*svalues;
5213   MatScalar              *b_otha,*bufa,*bufA;
5214   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5215   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5216   MPI_Status             *sstatus,rstatus;
5217   PetscMPIInt            jj;
5218   PetscInt               *cols,sbs,rbs;
5219   PetscScalar            *vals;
5220 
5221   PetscFunctionBegin;
5222   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5223     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5224   }
5225   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5226   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5227 
5228   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5229   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5230   rvalues  = gen_from->values; /* holds the length of receiving row */
5231   svalues  = gen_to->values;   /* holds the length of sending row */
5232   nrecvs   = gen_from->n;
5233   nsends   = gen_to->n;
5234 
5235   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5236   srow     = gen_to->indices;   /* local row index to be sent */
5237   sstarts  = gen_to->starts;
5238   sprocs   = gen_to->procs;
5239   sstatus  = gen_to->sstatus;
5240   sbs      = gen_to->bs;
5241   rstarts  = gen_from->starts;
5242   rprocs   = gen_from->procs;
5243   rbs      = gen_from->bs;
5244 
5245   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5246   if (scall == MAT_INITIAL_MATRIX){
5247     /* i-array */
5248     /*---------*/
5249     /*  post receives */
5250     for (i=0; i<nrecvs; i++){
5251       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5252       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5253       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5254     }
5255 
5256     /* pack the outgoing message */
5257     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5258     sstartsj[0] = 0;  rstartsj[0] = 0;
5259     len = 0; /* total length of j or a array to be sent */
5260     k = 0;
5261     for (i=0; i<nsends; i++){
5262       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5263       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5264       for (j=0; j<nrows; j++) {
5265         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5266         for (l=0; l<sbs; l++){
5267           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5268           rowlen[j*sbs+l] = ncols;
5269           len += ncols;
5270           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5271         }
5272         k++;
5273       }
5274       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5275       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5276     }
5277     /* recvs and sends of i-array are completed */
5278     i = nrecvs;
5279     while (i--) {
5280       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5281     }
5282     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5283 
5284     /* allocate buffers for sending j and a arrays */
5285     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5286     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5287 
5288     /* create i-array of B_oth */
5289     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5290     b_othi[0] = 0;
5291     len = 0; /* total length of j or a array to be received */
5292     k = 0;
5293     for (i=0; i<nrecvs; i++){
5294       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5295       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5296       for (j=0; j<nrows; j++) {
5297         b_othi[k+1] = b_othi[k] + rowlen[j];
5298         len += rowlen[j]; k++;
5299       }
5300       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5301     }
5302 
5303     /* allocate space for j and a arrrays of B_oth */
5304     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5305     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5306 
5307     /* j-array */
5308     /*---------*/
5309     /*  post receives of j-array */
5310     for (i=0; i<nrecvs; i++){
5311       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5312       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5313     }
5314 
5315     /* pack the outgoing message j-array */
5316     k = 0;
5317     for (i=0; i<nsends; i++){
5318       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5319       bufJ = bufj+sstartsj[i];
5320       for (j=0; j<nrows; j++) {
5321         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5322         for (ll=0; ll<sbs; ll++){
5323           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5324           for (l=0; l<ncols; l++){
5325             *bufJ++ = cols[l];
5326           }
5327           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5328         }
5329       }
5330       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5331     }
5332 
5333     /* recvs and sends of j-array are completed */
5334     i = nrecvs;
5335     while (i--) {
5336       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5337     }
5338     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5339   } else if (scall == MAT_REUSE_MATRIX){
5340     sstartsj = *startsj_s;
5341     rstartsj = *startsj_r;
5342     bufa     = *bufa_ptr;
5343     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5344     b_otha   = b_oth->a;
5345   } else {
5346     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5347   }
5348 
5349   /* a-array */
5350   /*---------*/
5351   /*  post receives of a-array */
5352   for (i=0; i<nrecvs; i++){
5353     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5354     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5355   }
5356 
5357   /* pack the outgoing message a-array */
5358   k = 0;
5359   for (i=0; i<nsends; i++){
5360     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5361     bufA = bufa+sstartsj[i];
5362     for (j=0; j<nrows; j++) {
5363       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5364       for (ll=0; ll<sbs; ll++){
5365         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5366         for (l=0; l<ncols; l++){
5367           *bufA++ = vals[l];
5368         }
5369         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5370       }
5371     }
5372     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5373   }
5374   /* recvs and sends of a-array are completed */
5375   i = nrecvs;
5376   while (i--) {
5377     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5378   }
5379   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5380   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5381 
5382   if (scall == MAT_INITIAL_MATRIX){
5383     /* put together the new matrix */
5384     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5385 
5386     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5387     /* Since these are PETSc arrays, change flags to free them as necessary. */
5388     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5389     b_oth->free_a  = PETSC_TRUE;
5390     b_oth->free_ij = PETSC_TRUE;
5391     b_oth->nonew   = 0;
5392 
5393     ierr = PetscFree(bufj);CHKERRQ(ierr);
5394     if (!startsj_s || !bufa_ptr){
5395       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5396       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5397     } else {
5398       *startsj_s = sstartsj;
5399       *startsj_r = rstartsj;
5400       *bufa_ptr  = bufa;
5401     }
5402   }
5403   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5404   PetscFunctionReturn(0);
5405 }
5406 
5407 #undef __FUNCT__
5408 #define __FUNCT__ "MatGetCommunicationStructs"
5409 /*@C
5410   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5411 
5412   Not Collective
5413 
5414   Input Parameters:
5415 . A - The matrix in mpiaij format
5416 
5417   Output Parameter:
5418 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5419 . colmap - A map from global column index to local index into lvec
5420 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5421 
5422   Level: developer
5423 
5424 @*/
5425 #if defined (PETSC_USE_CTABLE)
5426 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5427 #else
5428 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5429 #endif
5430 {
5431   Mat_MPIAIJ *a;
5432 
5433   PetscFunctionBegin;
5434   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5435   PetscValidPointer(lvec, 2);
5436   PetscValidPointer(colmap, 3);
5437   PetscValidPointer(multScatter, 4);
5438   a = (Mat_MPIAIJ *) A->data;
5439   if (lvec) *lvec = a->lvec;
5440   if (colmap) *colmap = a->colmap;
5441   if (multScatter) *multScatter = a->Mvctx;
5442   PetscFunctionReturn(0);
5443 }
5444 
5445 EXTERN_C_BEGIN
5446 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5447 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5448 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5449 EXTERN_C_END
5450 
5451 #undef __FUNCT__
5452 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5453 /*
5454     Computes (B'*A')' since computing B*A directly is untenable
5455 
5456                n                       p                          p
5457         (              )       (              )         (                  )
5458       m (      A       )  *  n (       B      )   =   m (         C        )
5459         (              )       (              )         (                  )
5460 
5461 */
5462 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5463 {
5464   PetscErrorCode     ierr;
5465   Mat                At,Bt,Ct;
5466 
5467   PetscFunctionBegin;
5468   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5469   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5470   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5471   ierr = MatDestroy(&At);CHKERRQ(ierr);
5472   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5473   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5474   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5475   PetscFunctionReturn(0);
5476 }
5477 
5478 #undef __FUNCT__
5479 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5480 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5481 {
5482   PetscErrorCode ierr;
5483   PetscInt       m=A->rmap->n,n=B->cmap->n;
5484   Mat            Cmat;
5485 
5486   PetscFunctionBegin;
5487   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5488   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5489   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5490   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5491   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5492   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5493   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5494   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5495   *C   = Cmat;
5496   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5497   PetscFunctionReturn(0);
5498 }
5499 
5500 /* ----------------------------------------------------------------*/
5501 #undef __FUNCT__
5502 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5503 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5504 {
5505   PetscErrorCode ierr;
5506 
5507   PetscFunctionBegin;
5508   if (scall == MAT_INITIAL_MATRIX){
5509     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5510   }
5511   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5512   PetscFunctionReturn(0);
5513 }
5514 
5515 EXTERN_C_BEGIN
5516 #if defined(PETSC_HAVE_MUMPS)
5517 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5518 #endif
5519 #if defined(PETSC_HAVE_PASTIX)
5520 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5521 #endif
5522 #if defined(PETSC_HAVE_SUPERLU_DIST)
5523 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5524 #endif
5525 #if defined(PETSC_HAVE_SPOOLES)
5526 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5527 #endif
5528 EXTERN_C_END
5529 
5530 /*MC
5531    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5532 
5533    Options Database Keys:
5534 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5535 
5536   Level: beginner
5537 
5538 .seealso: MatCreateAIJ()
5539 M*/
5540 
5541 EXTERN_C_BEGIN
5542 #undef __FUNCT__
5543 #define __FUNCT__ "MatCreate_MPIAIJ"
5544 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5545 {
5546   Mat_MPIAIJ     *b;
5547   PetscErrorCode ierr;
5548   PetscMPIInt    size;
5549 
5550   PetscFunctionBegin;
5551   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5552   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5553   B->data         = (void*)b;
5554   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5555   B->assembled    = PETSC_FALSE;
5556   B->insertmode   = NOT_SET_VALUES;
5557   b->size         = size;
5558   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5559 
5560   /* build cache for off array entries formed */
5561   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5562   b->donotstash  = PETSC_FALSE;
5563   b->colmap      = 0;
5564   b->garray      = 0;
5565   b->roworiented = PETSC_TRUE;
5566 
5567   /* stuff used for matrix vector multiply */
5568   b->lvec      = PETSC_NULL;
5569   b->Mvctx     = PETSC_NULL;
5570 
5571   /* stuff for MatGetRow() */
5572   b->rowindices   = 0;
5573   b->rowvalues    = 0;
5574   b->getrowactive = PETSC_FALSE;
5575 
5576   /* flexible pointer used in CUSP/CUSPARSE classes */
5577   b->spptr        = PETSC_NULL;
5578 #if defined(PETSC_HAVE_SPOOLES)
5579   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5580                                      "MatGetFactor_mpiaij_spooles",
5581                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5582 #endif
5583 #if defined(PETSC_HAVE_MUMPS)
5584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5585                                      "MatGetFactor_aij_mumps",
5586                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5587 #endif
5588 #if defined(PETSC_HAVE_PASTIX)
5589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5590 					   "MatGetFactor_mpiaij_pastix",
5591 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5592 #endif
5593 #if defined(PETSC_HAVE_SUPERLU_DIST)
5594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5595                                      "MatGetFactor_mpiaij_superlu_dist",
5596                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5597 #endif
5598   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5599                                      "MatStoreValues_MPIAIJ",
5600                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5602                                      "MatRetrieveValues_MPIAIJ",
5603                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5605 				     "MatGetDiagonalBlock_MPIAIJ",
5606                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5608 				     "MatIsTranspose_MPIAIJ",
5609 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5611 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5612 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5614 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5615 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5617 				     "MatDiagonalScaleLocal_MPIAIJ",
5618 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5620                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5621                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5623                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5624                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5626                                      "MatConvert_MPIAIJ_MPISBAIJ",
5627                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5629                                      "MatMatMult_MPIDense_MPIAIJ",
5630                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5632                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5633                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5635                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5636                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5637   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5638   PetscFunctionReturn(0);
5639 }
5640 EXTERN_C_END
5641 
5642 #undef __FUNCT__
5643 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5644 /*@
5645      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5646          and "off-diagonal" part of the matrix in CSR format.
5647 
5648    Collective on MPI_Comm
5649 
5650    Input Parameters:
5651 +  comm - MPI communicator
5652 .  m - number of local rows (Cannot be PETSC_DECIDE)
5653 .  n - This value should be the same as the local size used in creating the
5654        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5655        calculated if N is given) For square matrices n is almost always m.
5656 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5657 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5658 .   i - row indices for "diagonal" portion of matrix
5659 .   j - column indices
5660 .   a - matrix values
5661 .   oi - row indices for "off-diagonal" portion of matrix
5662 .   oj - column indices
5663 -   oa - matrix values
5664 
5665    Output Parameter:
5666 .   mat - the matrix
5667 
5668    Level: advanced
5669 
5670    Notes:
5671        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5672        must free the arrays once the matrix has been destroyed and not before.
5673 
5674        The i and j indices are 0 based
5675 
5676        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5677 
5678        This sets local rows and cannot be used to set off-processor values.
5679 
5680        You cannot later use MatSetValues() to change values in this matrix.
5681 
5682 .keywords: matrix, aij, compressed row, sparse, parallel
5683 
5684 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5685           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5686 @*/
5687 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5688 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5689 {
5690   PetscErrorCode ierr;
5691   Mat_MPIAIJ     *maij;
5692 
5693  PetscFunctionBegin;
5694   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5695   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5696   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5697   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5698   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5699   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5700   maij = (Mat_MPIAIJ*) (*mat)->data;
5701   maij->donotstash     = PETSC_TRUE;
5702   (*mat)->preallocated = PETSC_TRUE;
5703 
5704   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5705   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5706 
5707   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5708   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5709 
5710   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5711   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5712   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5713   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5714 
5715   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5716   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5717   PetscFunctionReturn(0);
5718 }
5719 
5720 /*
5721     Special version for direct calls from Fortran
5722 */
5723 #include <petsc-private/fortranimpl.h>
5724 
5725 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5726 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5727 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5728 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5729 #endif
5730 
5731 /* Change these macros so can be used in void function */
5732 #undef CHKERRQ
5733 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5734 #undef SETERRQ2
5735 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5736 #undef SETERRQ3
5737 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5738 #undef SETERRQ
5739 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5740 
5741 EXTERN_C_BEGIN
5742 #undef __FUNCT__
5743 #define __FUNCT__ "matsetvaluesmpiaij_"
5744 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5745 {
5746   Mat             mat = *mmat;
5747   PetscInt        m = *mm, n = *mn;
5748   InsertMode      addv = *maddv;
5749   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5750   PetscScalar     value;
5751   PetscErrorCode  ierr;
5752 
5753   MatCheckPreallocated(mat,1);
5754   if (mat->insertmode == NOT_SET_VALUES) {
5755     mat->insertmode = addv;
5756   }
5757 #if defined(PETSC_USE_DEBUG)
5758   else if (mat->insertmode != addv) {
5759     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5760   }
5761 #endif
5762   {
5763   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5764   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5765   PetscBool       roworiented = aij->roworiented;
5766 
5767   /* Some Variables required in the macro */
5768   Mat             A = aij->A;
5769   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5770   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5771   MatScalar       *aa = a->a;
5772   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5773   Mat             B = aij->B;
5774   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5775   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5776   MatScalar       *ba = b->a;
5777 
5778   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5779   PetscInt        nonew = a->nonew;
5780   MatScalar       *ap1,*ap2;
5781 
5782   PetscFunctionBegin;
5783   for (i=0; i<m; i++) {
5784     if (im[i] < 0) continue;
5785 #if defined(PETSC_USE_DEBUG)
5786     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5787 #endif
5788     if (im[i] >= rstart && im[i] < rend) {
5789       row      = im[i] - rstart;
5790       lastcol1 = -1;
5791       rp1      = aj + ai[row];
5792       ap1      = aa + ai[row];
5793       rmax1    = aimax[row];
5794       nrow1    = ailen[row];
5795       low1     = 0;
5796       high1    = nrow1;
5797       lastcol2 = -1;
5798       rp2      = bj + bi[row];
5799       ap2      = ba + bi[row];
5800       rmax2    = bimax[row];
5801       nrow2    = bilen[row];
5802       low2     = 0;
5803       high2    = nrow2;
5804 
5805       for (j=0; j<n; j++) {
5806         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5807         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5808         if (in[j] >= cstart && in[j] < cend){
5809           col = in[j] - cstart;
5810           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5811         } else if (in[j] < 0) continue;
5812 #if defined(PETSC_USE_DEBUG)
5813         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5814 #endif
5815         else {
5816           if (mat->was_assembled) {
5817             if (!aij->colmap) {
5818               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5819             }
5820 #if defined (PETSC_USE_CTABLE)
5821             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5822 	    col--;
5823 #else
5824             col = aij->colmap[in[j]] - 1;
5825 #endif
5826             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5827               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5828               col =  in[j];
5829               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5830               B = aij->B;
5831               b = (Mat_SeqAIJ*)B->data;
5832               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5833               rp2      = bj + bi[row];
5834               ap2      = ba + bi[row];
5835               rmax2    = bimax[row];
5836               nrow2    = bilen[row];
5837               low2     = 0;
5838               high2    = nrow2;
5839               bm       = aij->B->rmap->n;
5840               ba = b->a;
5841             }
5842           } else col = in[j];
5843           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5844         }
5845       }
5846     } else {
5847       if (!aij->donotstash) {
5848         if (roworiented) {
5849           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5850         } else {
5851           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5852         }
5853       }
5854     }
5855   }}
5856   PetscFunctionReturnVoid();
5857 }
5858 EXTERN_C_END
5859 
5860