xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 196cc216ae9548808fc2ef4c2e514bddc4813f7d)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302   FILE              *file;
1303 
1304   PetscFunctionBegin;
1305   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1306   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1307   nz   = A->nz + B->nz;
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) {
1319       rlen = PetscMax(rlen,range[i+1] - range[i]);
1320     }
1321   } else {
1322     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1323     rlen = mat->rmap->n;
1324   }
1325 
1326   /* load up the local row counts */
1327   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1328   for (i=0; i<mat->rmap->n; i++) {
1329     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1330   }
1331 
1332   /* store the row lengths to the file */
1333   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1334   if (!rank) {
1335     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     for (i=1; i<size; i++) {
1337       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1338       rlen = range[i+1] - range[i];
1339       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1345     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1346     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1349 
1350   /* load up the local column indices */
1351   nzmax = nz; /* )th processor needs space a largest processor needs */
1352   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1353   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1354   cnt  = 0;
1355   for (i=0; i<mat->rmap->n; i++) {
1356     for (j=B->i[i]; j<B->i[i+1]; j++) {
1357       if ( (col = garray[B->j[j]]) > cstart) break;
1358       column_indices[cnt++] = col;
1359     }
1360     for (k=A->i[i]; k<A->i[i+1]; k++) {
1361       column_indices[cnt++] = A->j[k] + cstart;
1362     }
1363     for (; j<B->i[i+1]; j++) {
1364       column_indices[cnt++] = garray[B->j[j]];
1365     }
1366   }
1367   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1368 
1369   /* store the column indices to the file */
1370    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1371   if (!rank) {
1372     MPI_Status status;
1373     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     for (i=1; i<size; i++) {
1375       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1376       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1377       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1378       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1380     }
1381      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1382   } else {
1383     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1384     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1386     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1387   }
1388   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1389 
1390   /* load up the local column values */
1391   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1392   cnt  = 0;
1393   for (i=0; i<mat->rmap->n; i++) {
1394     for (j=B->i[i]; j<B->i[i+1]; j++) {
1395       if ( garray[B->j[j]] > cstart) break;
1396       column_values[cnt++] = B->a[j];
1397     }
1398     for (k=A->i[i]; k<A->i[i+1]; k++) {
1399       column_values[cnt++] = A->a[k];
1400     }
1401     for (; j<B->i[i+1]; j++) {
1402       column_values[cnt++] = B->a[j];
1403     }
1404   }
1405   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1406 
1407   /* store the column values to the file */
1408    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1409   if (!rank) {
1410     MPI_Status status;
1411     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     for (i=1; i<size; i++) {
1413        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1414       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1415       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1416       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1420   } else {
1421     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1422     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1424     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1425   }
1426   ierr = PetscFree(column_values);CHKERRQ(ierr);
1427 
1428   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1429   if (file) {
1430     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1431   }
1432 
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1439 {
1440   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1441   PetscErrorCode    ierr;
1442   PetscMPIInt       rank = aij->rank,size = aij->size;
1443   PetscBool         isdraw,iascii,isbinary;
1444   PetscViewer       sviewer;
1445   PetscViewerFormat format;
1446 
1447   PetscFunctionBegin;
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1451   if (iascii) {
1452     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1453     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1454       MatInfo    info;
1455       PetscBool  inodes;
1456 
1457       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1458       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1461       if (!inodes) {
1462         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1463 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1464       } else {
1465         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1466 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1467       }
1468       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1469       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1470       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1472       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1475       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1476       PetscFunctionReturn(0);
1477     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1478       PetscInt   inodecount,inodelimit,*inodes;
1479       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1480       if (inodes) {
1481         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1482       } else {
1483         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1484       }
1485       PetscFunctionReturn(0);
1486     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1487       PetscFunctionReturn(0);
1488     }
1489   } else if (isbinary) {
1490     if (size == 1) {
1491       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1492       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1493     } else {
1494       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1495     }
1496     PetscFunctionReturn(0);
1497   } else if (isdraw) {
1498     PetscDraw  draw;
1499     PetscBool  isnull;
1500     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1501     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1502   }
1503 
1504   if (size == 1) {
1505     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1506     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1507   } else {
1508     /* assemble the entire matrix onto first processor. */
1509     Mat         A;
1510     Mat_SeqAIJ  *Aloc;
1511     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1512     MatScalar   *a;
1513 
1514     if (mat->rmap->N > 1024) {
1515       PetscBool  flg = PETSC_FALSE;
1516 
1517       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1518       if (!flg) {
1519         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1520       }
1521     }
1522 
1523     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1524     if (!rank) {
1525       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1526     } else {
1527       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1528     }
1529     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1530     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1531     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1532     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1533     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1534 
1535     /* copy over the A part */
1536     Aloc = (Mat_SeqAIJ*)aij->A->data;
1537     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row = mat->rmap->rstart;
1539     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1542       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1543     }
1544     aj = Aloc->j;
1545     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1546 
1547     /* copy over the B part */
1548     Aloc = (Mat_SeqAIJ*)aij->B->data;
1549     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550     row  = mat->rmap->rstart;
1551     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1552     ct   = cols;
1553     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1554     for (i=0; i<m; i++) {
1555       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1556       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1557     }
1558     ierr = PetscFree(ct);CHKERRQ(ierr);
1559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1561     /*
1562        Everyone has to call to draw the matrix since the graphics waits are
1563        synchronized across all processors that share the PetscDraw object
1564     */
1565     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     if (!rank) {
1567       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1568       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1569       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1570       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1571     }
1572     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1573     ierr = MatDestroy(&A);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatView_MPIAIJ"
1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1581 {
1582   PetscErrorCode ierr;
1583   PetscBool      iascii,isdraw,issocket,isbinary;
1584 
1585   PetscFunctionBegin;
1586   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1587   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1588   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1590   if (iascii || isdraw || isbinary || issocket) {
1591     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1592   } else {
1593     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatSOR_MPIAIJ"
1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1601 {
1602   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1603   PetscErrorCode ierr;
1604   Vec            bb1 = 0;
1605   PetscBool      hasop;
1606 
1607   PetscFunctionBegin;
1608   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1609     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1610   }
1611 
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622 
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1651     if (flag & SOR_ZERO_INITIAL_GUESS) {
1652       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1653       its--;
1654     }
1655     while (its--) {
1656       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658 
1659       /* update rhs: bb1 = bb - B*x */
1660       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1661       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1662 
1663       /* local sweep */
1664       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1665     }
1666   }  else if (flag & SOR_EISENSTAT) {
1667     Vec         xx1;
1668 
1669     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1670     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1671 
1672     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674     if (!mat->diag) {
1675       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1676       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1677     }
1678     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1679     if (hasop) {
1680       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1681     } else {
1682       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1683     }
1684     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1685 
1686     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1687 
1688     /* local sweep */
1689     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1691     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1692   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1693 
1694   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatPermute_MPIAIJ"
1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1701 {
1702   MPI_Comm       comm;
1703   PetscInt       first,local_rowsize,local_colsize;
1704   const PetscInt *rows;
1705   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1710   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1711   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1712   /* collect the global row permutation and invert it */
1713   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1714   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1715   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1716   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1717   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1718   /* get the local target indices */
1719   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1720   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1721   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1722   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1723   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1724   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1725   /* the column permutation is so much easier;
1726      make a local version of 'colp' and invert it */
1727   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1728   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1729   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1730   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1731   /* now we just get the submatrix */
1732   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1733   /* clean up */
1734   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1735   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   Mat            A = mat->A,B = mat->B;
1745   PetscErrorCode ierr;
1746   PetscReal      isend[5],irecv[5];
1747 
1748   PetscFunctionBegin;
1749   info->block_size     = 1.0;
1750   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1752   isend[3] = info->memory;  isend[4] = info->mallocs;
1753   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1754   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1755   isend[3] += info->memory;  isend[4] += info->mallocs;
1756   if (flag == MAT_LOCAL) {
1757     info->nz_used      = isend[0];
1758     info->nz_allocated = isend[1];
1759     info->nz_unneeded  = isend[2];
1760     info->memory       = isend[3];
1761     info->mallocs      = isend[4];
1762   } else if (flag == MAT_GLOBAL_MAX) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   } else if (flag == MAT_GLOBAL_SUM) {
1770     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1771     info->nz_used      = irecv[0];
1772     info->nz_allocated = irecv[1];
1773     info->nz_unneeded  = irecv[2];
1774     info->memory       = irecv[3];
1775     info->mallocs      = irecv[4];
1776   }
1777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1778   info->fill_ratio_needed = 0;
1779   info->factor_mallocs    = 0;
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatSetOption_MPIAIJ"
1785 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1786 {
1787   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1788   PetscErrorCode ierr;
1789 
1790   PetscFunctionBegin;
1791   switch (op) {
1792   case MAT_NEW_NONZERO_LOCATIONS:
1793   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1794   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1795   case MAT_KEEP_NONZERO_PATTERN:
1796   case MAT_NEW_NONZERO_LOCATION_ERR:
1797   case MAT_USE_INODES:
1798   case MAT_IGNORE_ZERO_ENTRIES:
1799     MatCheckPreallocated(A,1);
1800     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1801     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1802     break;
1803   case MAT_ROW_ORIENTED:
1804     a->roworiented = flg;
1805     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1806     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_NEW_DIAGONALS:
1809     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1810     break;
1811   case MAT_IGNORE_OFF_PROC_ENTRIES:
1812     a->donotstash = flg;
1813     break;
1814   case MAT_SPD:
1815     A->spd_set                         = PETSC_TRUE;
1816     A->spd                             = flg;
1817     if (flg) {
1818       A->symmetric                     = PETSC_TRUE;
1819       A->structurally_symmetric        = PETSC_TRUE;
1820       A->symmetric_set                 = PETSC_TRUE;
1821       A->structurally_symmetric_set    = PETSC_TRUE;
1822     }
1823     break;
1824   case MAT_SYMMETRIC:
1825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826     break;
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1829     break;
1830   case MAT_HERMITIAN:
1831     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1832     break;
1833   case MAT_SYMMETRY_ETERNAL:
1834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1835     break;
1836   default:
1837     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1838   }
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatGetRow_MPIAIJ"
1844 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1845 {
1846   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1847   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1848   PetscErrorCode ierr;
1849   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1850   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1851   PetscInt       *cmap,*idx_p;
1852 
1853   PetscFunctionBegin;
1854   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1855   mat->getrowactive = PETSC_TRUE;
1856 
1857   if (!mat->rowvalues && (idx || v)) {
1858     /*
1859         allocate enough space to hold information from the longest row.
1860     */
1861     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1862     PetscInt   max = 1,tmp;
1863     for (i=0; i<matin->rmap->n; i++) {
1864       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1865       if (max < tmp) { max = tmp; }
1866     }
1867     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1868   }
1869 
1870   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1871   lrow = row - rstart;
1872 
1873   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1874   if (!v)   {pvA = 0; pvB = 0;}
1875   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1876   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1877   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1878   nztot = nzA + nzB;
1879 
1880   cmap  = mat->garray;
1881   if (v  || idx) {
1882     if (nztot) {
1883       /* Sort by increasing column numbers, assuming A and B already sorted */
1884       PetscInt imark = -1;
1885       if (v) {
1886         *v = v_p = mat->rowvalues;
1887         for (i=0; i<nzB; i++) {
1888           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1889           else break;
1890         }
1891         imark = i;
1892         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1893         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1894       }
1895       if (idx) {
1896         *idx = idx_p = mat->rowindices;
1897         if (imark > -1) {
1898           for (i=0; i<imark; i++) {
1899             idx_p[i] = cmap[cworkB[i]];
1900           }
1901         } else {
1902           for (i=0; i<nzB; i++) {
1903             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1904             else break;
1905           }
1906           imark = i;
1907         }
1908         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1909         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1910       }
1911     } else {
1912       if (idx) *idx = 0;
1913       if (v)   *v   = 0;
1914     }
1915   }
1916   *nz = nztot;
1917   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1918   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1924 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1925 {
1926   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1927 
1928   PetscFunctionBegin;
1929   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1930   aij->getrowactive = PETSC_FALSE;
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatNorm_MPIAIJ"
1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1937 {
1938   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1939   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1940   PetscErrorCode ierr;
1941   PetscInt       i,j,cstart = mat->cmap->rstart;
1942   PetscReal      sum = 0.0;
1943   MatScalar      *v;
1944 
1945   PetscFunctionBegin;
1946   if (aij->size == 1) {
1947     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1948   } else {
1949     if (type == NORM_FROBENIUS) {
1950       v = amat->a;
1951       for (i=0; i<amat->nz; i++) {
1952 #if defined(PETSC_USE_COMPLEX)
1953         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1954 #else
1955         sum += (*v)*(*v); v++;
1956 #endif
1957       }
1958       v = bmat->a;
1959       for (i=0; i<bmat->nz; i++) {
1960 #if defined(PETSC_USE_COMPLEX)
1961         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1962 #else
1963         sum += (*v)*(*v); v++;
1964 #endif
1965       }
1966       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1967       *norm = PetscSqrtReal(*norm);
1968     } else if (type == NORM_1) { /* max column norm */
1969       PetscReal *tmp,*tmp2;
1970       PetscInt  *jj,*garray = aij->garray;
1971       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1972       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1973       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1974       *norm = 0.0;
1975       v = amat->a; jj = amat->j;
1976       for (j=0; j<amat->nz; j++) {
1977         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1978       }
1979       v = bmat->a; jj = bmat->j;
1980       for (j=0; j<bmat->nz; j++) {
1981         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1982       }
1983       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1984       for (j=0; j<mat->cmap->N; j++) {
1985         if (tmp2[j] > *norm) *norm = tmp2[j];
1986       }
1987       ierr = PetscFree(tmp);CHKERRQ(ierr);
1988       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1989     } else if (type == NORM_INFINITY) { /* max row norm */
1990       PetscReal ntemp = 0.0;
1991       for (j=0; j<aij->A->rmap->n; j++) {
1992         v = amat->a + amat->i[j];
1993         sum = 0.0;
1994         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1995           sum += PetscAbsScalar(*v); v++;
1996         }
1997         v = bmat->a + bmat->i[j];
1998         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1999           sum += PetscAbsScalar(*v); v++;
2000         }
2001         if (sum > ntemp) ntemp = sum;
2002       }
2003       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2004     } else {
2005       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2006     }
2007   }
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatTranspose_MPIAIJ"
2013 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2014 {
2015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2016   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2017   PetscErrorCode ierr;
2018   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2019   PetscInt       cstart=A->cmap->rstart,ncol;
2020   Mat            B;
2021   MatScalar      *array;
2022 
2023   PetscFunctionBegin;
2024   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2025 
2026   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2027   ai = Aloc->i; aj = Aloc->j;
2028   bi = Bloc->i; bj = Bloc->j;
2029   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2030     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2031     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2032     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2033     for (i=0; i<ai[ma]; i++){
2034       d_nnz[aj[i]] ++;
2035       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2036     }
2037 
2038     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2039     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2040     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2041     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2042     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2044     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2045   } else {
2046     B = *matout;
2047     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2048     for (i=0; i<ai[ma]; i++){
2049       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2050     }
2051   }
2052 
2053   /* copy over the A part */
2054   array = Aloc->a;
2055   row = A->rmap->rstart;
2056   for (i=0; i<ma; i++) {
2057     ncol = ai[i+1]-ai[i];
2058     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2059     row++; array += ncol; aj += ncol;
2060   }
2061   aj = Aloc->j;
2062   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2063 
2064   /* copy over the B part */
2065   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2066   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2067   array = Bloc->a;
2068   row = A->rmap->rstart;
2069   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2070   cols_tmp = cols;
2071   for (i=0; i<mb; i++) {
2072     ncol = bi[i+1]-bi[i];
2073     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2074     row++; array += ncol; cols_tmp += ncol;
2075   }
2076   ierr = PetscFree(cols);CHKERRQ(ierr);
2077 
2078   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2081     *matout = B;
2082   } else {
2083     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2090 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2091 {
2092   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2093   Mat            a = aij->A,b = aij->B;
2094   PetscErrorCode ierr;
2095   PetscInt       s1,s2,s3;
2096 
2097   PetscFunctionBegin;
2098   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2099   if (rr) {
2100     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2101     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2102     /* Overlap communication with computation. */
2103     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2104   }
2105   if (ll) {
2106     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2107     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2108     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2109   }
2110   /* scale  the diagonal block */
2111   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2112 
2113   if (rr) {
2114     /* Do a scatter end and then right scale the off-diagonal block */
2115     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2116     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2117   }
2118 
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2124 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2125 {
2126   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatEqual_MPIAIJ"
2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2137 {
2138   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2139   Mat            a,b,c,d;
2140   PetscBool      flg;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   a = matA->A; b = matA->B;
2145   c = matB->A; d = matB->B;
2146 
2147   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2148   if (flg) {
2149     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2150   }
2151   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatCopy_MPIAIJ"
2157 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2158 {
2159   PetscErrorCode ierr;
2160   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2161   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2162 
2163   PetscFunctionBegin;
2164   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2165   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2166     /* because of the column compression in the off-processor part of the matrix a->B,
2167        the number of columns in a->B and b->B may be different, hence we cannot call
2168        the MatCopy() directly on the two parts. If need be, we can provide a more
2169        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2170        then copying the submatrices */
2171     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2172   } else {
2173     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2174     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatSetUp_MPIAIJ"
2181 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2182 {
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2192 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2193 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2194 {
2195   PetscInt          i,m=Y->rmap->N;
2196   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2197   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2198   const PetscInt    *xi = x->i,*yi = y->i;
2199 
2200   PetscFunctionBegin;
2201   /* Set the number of nonzeros in the new matrix */
2202   for(i=0; i<m; i++) {
2203     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2204     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2205     nnz[i] = 0;
2206     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2207       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2208       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2209       nnz[i]++;
2210     }
2211     for (; k<nzy; k++) nnz[i]++;
2212   }
2213   PetscFunctionReturn(0);
2214 }
2215 
2216 #undef __FUNCT__
2217 #define __FUNCT__ "MatAXPY_MPIAIJ"
2218 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2219 {
2220   PetscErrorCode ierr;
2221   PetscInt       i;
2222   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2223   PetscBLASInt   bnz,one=1;
2224   Mat_SeqAIJ     *x,*y;
2225 
2226   PetscFunctionBegin;
2227   if (str == SAME_NONZERO_PATTERN) {
2228     PetscScalar alpha = a;
2229     x = (Mat_SeqAIJ *)xx->A->data;
2230     y = (Mat_SeqAIJ *)yy->A->data;
2231     bnz = PetscBLASIntCast(x->nz);
2232     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2233     x = (Mat_SeqAIJ *)xx->B->data;
2234     y = (Mat_SeqAIJ *)yy->B->data;
2235     bnz = PetscBLASIntCast(x->nz);
2236     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2237   } else if (str == SUBSET_NONZERO_PATTERN) {
2238     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2239 
2240     x = (Mat_SeqAIJ *)xx->B->data;
2241     y = (Mat_SeqAIJ *)yy->B->data;
2242     if (y->xtoy && y->XtoY != xx->B) {
2243       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2244       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2245     }
2246     if (!y->xtoy) { /* get xtoy */
2247       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2248       y->XtoY = xx->B;
2249       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2250     }
2251     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2252   } else {
2253     Mat B;
2254     PetscInt *nnz_d,*nnz_o;
2255     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2256     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2257     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2258     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2259     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2260     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2261     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2262     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2263     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2264     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2265     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2266     ierr = MatHeaderReplace(Y,B);
2267     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2268     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2269   }
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2274 
2275 #undef __FUNCT__
2276 #define __FUNCT__ "MatConjugate_MPIAIJ"
2277 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2278 {
2279 #if defined(PETSC_USE_COMPLEX)
2280   PetscErrorCode ierr;
2281   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2282 
2283   PetscFunctionBegin;
2284   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2285   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2286 #else
2287   PetscFunctionBegin;
2288 #endif
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatRealPart_MPIAIJ"
2294 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2295 {
2296   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2297   PetscErrorCode ierr;
2298 
2299   PetscFunctionBegin;
2300   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2301   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 #undef __FUNCT__
2306 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2307 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2308 {
2309   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2314   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 #ifdef PETSC_HAVE_PBGL
2319 
2320 #include <boost/parallel/mpi/bsp_process_group.hpp>
2321 #include <boost/graph/distributed/ilu_default_graph.hpp>
2322 #include <boost/graph/distributed/ilu_0_block.hpp>
2323 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2324 #include <boost/graph/distributed/petsc/interface.hpp>
2325 #include <boost/multi_array.hpp>
2326 #include <boost/parallel/distributed_property_map->hpp>
2327 
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2330 /*
2331   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2332 */
2333 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2334 {
2335   namespace petsc = boost::distributed::petsc;
2336 
2337   namespace graph_dist = boost::graph::distributed;
2338   using boost::graph::distributed::ilu_default::process_group_type;
2339   using boost::graph::ilu_permuted;
2340 
2341   PetscBool       row_identity, col_identity;
2342   PetscContainer  c;
2343   PetscInt        m, n, M, N;
2344   PetscErrorCode  ierr;
2345 
2346   PetscFunctionBegin;
2347   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2348   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2349   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2350   if (!row_identity || !col_identity) {
2351     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2352   }
2353 
2354   process_group_type pg;
2355   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2356   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2357   lgraph_type&   level_graph = *lgraph_p;
2358   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2359 
2360   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2361   ilu_permuted(level_graph);
2362 
2363   /* put together the new matrix */
2364   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2365   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2366   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2367   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2368   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2369   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2370   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2372 
2373   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2374   ierr = PetscContainerSetPointer(c, lgraph_p);
2375   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2376   ierr = PetscContainerDestroy(&c);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2382 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2383 {
2384   PetscFunctionBegin;
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatSolve_MPIAIJ"
2390 /*
2391   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2392 */
2393 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2394 {
2395   namespace graph_dist = boost::graph::distributed;
2396 
2397   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2398   lgraph_type*   lgraph_p;
2399   PetscContainer c;
2400   PetscErrorCode ierr;
2401 
2402   PetscFunctionBegin;
2403   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2404   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2405   ierr = VecCopy(b, x);CHKERRQ(ierr);
2406 
2407   PetscScalar* array_x;
2408   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2409   PetscInt sx;
2410   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2411 
2412   PetscScalar* array_b;
2413   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2414   PetscInt sb;
2415   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2416 
2417   lgraph_type&   level_graph = *lgraph_p;
2418   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2419 
2420   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2421   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2422                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2423 
2424   typedef boost::iterator_property_map<array_ref_type::iterator,
2425                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2426   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2427                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2428 
2429   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2430 
2431   PetscFunctionReturn(0);
2432 }
2433 #endif
2434 
2435 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2436   PetscInt       nzlocal,nsends,nrecvs;
2437   PetscMPIInt    *send_rank,*recv_rank;
2438   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2439   PetscScalar    *sbuf_a,**rbuf_a;
2440   PetscErrorCode (*Destroy)(Mat);
2441 } Mat_Redundant;
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2445 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2446 {
2447   PetscErrorCode       ierr;
2448   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2449   PetscInt             i;
2450 
2451   PetscFunctionBegin;
2452   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2453   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2454   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2455   for (i=0; i<redund->nrecvs; i++){
2456     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2457     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2458   }
2459   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2460   ierr = PetscFree(redund);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatDestroy_MatRedundant"
2466 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2467 {
2468   PetscErrorCode  ierr;
2469   PetscContainer  container;
2470   Mat_Redundant   *redund=PETSC_NULL;
2471 
2472   PetscFunctionBegin;
2473   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2474   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2475   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2476   A->ops->destroy = redund->Destroy;
2477   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2478   if (A->ops->destroy) {
2479     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2480   }
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2486 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2487 {
2488   PetscMPIInt    rank,size;
2489   MPI_Comm       comm=((PetscObject)mat)->comm;
2490   PetscErrorCode ierr;
2491   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2492   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2493   PetscInt       *rowrange=mat->rmap->range;
2494   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2495   Mat            A=aij->A,B=aij->B,C=*matredundant;
2496   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2497   PetscScalar    *sbuf_a;
2498   PetscInt       nzlocal=a->nz+b->nz;
2499   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2500   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2501   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2502   MatScalar      *aworkA,*aworkB;
2503   PetscScalar    *vals;
2504   PetscMPIInt    tag1,tag2,tag3,imdex;
2505   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2506                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2507   MPI_Status     recv_status,*send_status;
2508   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2509   PetscInt       **rbuf_j=PETSC_NULL;
2510   PetscScalar    **rbuf_a=PETSC_NULL;
2511   Mat_Redundant  *redund=PETSC_NULL;
2512   PetscContainer container;
2513 
2514   PetscFunctionBegin;
2515   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2516   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2517 
2518   if (reuse == MAT_REUSE_MATRIX) {
2519     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2520     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2521     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2522     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2523     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2524     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2525     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2526     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2527 
2528     nsends    = redund->nsends;
2529     nrecvs    = redund->nrecvs;
2530     send_rank = redund->send_rank;
2531     recv_rank = redund->recv_rank;
2532     sbuf_nz   = redund->sbuf_nz;
2533     rbuf_nz   = redund->rbuf_nz;
2534     sbuf_j    = redund->sbuf_j;
2535     sbuf_a    = redund->sbuf_a;
2536     rbuf_j    = redund->rbuf_j;
2537     rbuf_a    = redund->rbuf_a;
2538   }
2539 
2540   if (reuse == MAT_INITIAL_MATRIX){
2541     PetscMPIInt  subrank,subsize;
2542     PetscInt     nleftover,np_subcomm;
2543     /* get the destination processors' id send_rank, nsends and nrecvs */
2544     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2545     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2546     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2547     np_subcomm = size/nsubcomm;
2548     nleftover  = size - nsubcomm*np_subcomm;
2549     nsends = 0; nrecvs = 0;
2550     for (i=0; i<size; i++){ /* i=rank*/
2551       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2552         send_rank[nsends] = i; nsends++;
2553         recv_rank[nrecvs++] = i;
2554       }
2555     }
2556     if (rank >= size - nleftover){/* this proc is a leftover processor */
2557       i = size-nleftover-1;
2558       j = 0;
2559       while (j < nsubcomm - nleftover){
2560         send_rank[nsends++] = i;
2561         i--; j++;
2562       }
2563     }
2564 
2565     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2566       for (i=0; i<nleftover; i++){
2567         recv_rank[nrecvs++] = size-nleftover+i;
2568       }
2569     }
2570 
2571     /* allocate sbuf_j, sbuf_a */
2572     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2573     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2574     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2575   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2576 
2577   /* copy mat's local entries into the buffers */
2578   if (reuse == MAT_INITIAL_MATRIX){
2579     rownz_max = 0;
2580     rptr = sbuf_j;
2581     cols = sbuf_j + rend-rstart + 1;
2582     vals = sbuf_a;
2583     rptr[0] = 0;
2584     for (i=0; i<rend-rstart; i++){
2585       row = i + rstart;
2586       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2587       ncols  = nzA + nzB;
2588       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2589       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2590       /* load the column indices for this row into cols */
2591       lwrite = 0;
2592       for (l=0; l<nzB; l++) {
2593         if ((ctmp = bmap[cworkB[l]]) < cstart){
2594           vals[lwrite]   = aworkB[l];
2595           cols[lwrite++] = ctmp;
2596         }
2597       }
2598       for (l=0; l<nzA; l++){
2599         vals[lwrite]   = aworkA[l];
2600         cols[lwrite++] = cstart + cworkA[l];
2601       }
2602       for (l=0; l<nzB; l++) {
2603         if ((ctmp = bmap[cworkB[l]]) >= cend){
2604           vals[lwrite]   = aworkB[l];
2605           cols[lwrite++] = ctmp;
2606         }
2607       }
2608       vals += ncols;
2609       cols += ncols;
2610       rptr[i+1] = rptr[i] + ncols;
2611       if (rownz_max < ncols) rownz_max = ncols;
2612     }
2613     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2614   } else { /* only copy matrix values into sbuf_a */
2615     rptr = sbuf_j;
2616     vals = sbuf_a;
2617     rptr[0] = 0;
2618     for (i=0; i<rend-rstart; i++){
2619       row = i + rstart;
2620       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2621       ncols  = nzA + nzB;
2622       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2623       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2624       lwrite = 0;
2625       for (l=0; l<nzB; l++) {
2626         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2627       }
2628       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2629       for (l=0; l<nzB; l++) {
2630         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2631       }
2632       vals += ncols;
2633       rptr[i+1] = rptr[i] + ncols;
2634     }
2635   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2636 
2637   /* send nzlocal to others, and recv other's nzlocal */
2638   /*--------------------------------------------------*/
2639   if (reuse == MAT_INITIAL_MATRIX){
2640     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2641     s_waits2 = s_waits3 + nsends;
2642     s_waits1 = s_waits2 + nsends;
2643     r_waits1 = s_waits1 + nsends;
2644     r_waits2 = r_waits1 + nrecvs;
2645     r_waits3 = r_waits2 + nrecvs;
2646   } else {
2647     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2648     r_waits3 = s_waits3 + nsends;
2649   }
2650 
2651   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2652   if (reuse == MAT_INITIAL_MATRIX){
2653     /* get new tags to keep the communication clean */
2654     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2655     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2656     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2657 
2658     /* post receives of other's nzlocal */
2659     for (i=0; i<nrecvs; i++){
2660       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2661     }
2662     /* send nzlocal to others */
2663     for (i=0; i<nsends; i++){
2664       sbuf_nz[i] = nzlocal;
2665       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2666     }
2667     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2668     count = nrecvs;
2669     while (count) {
2670       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2671       recv_rank[imdex] = recv_status.MPI_SOURCE;
2672       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2673       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2674 
2675       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2676       rbuf_nz[imdex] += i + 2;
2677       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2678       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2679       count--;
2680     }
2681     /* wait on sends of nzlocal */
2682     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2683     /* send mat->i,j to others, and recv from other's */
2684     /*------------------------------------------------*/
2685     for (i=0; i<nsends; i++){
2686       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2687       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2688     }
2689     /* wait on receives of mat->i,j */
2690     /*------------------------------*/
2691     count = nrecvs;
2692     while (count) {
2693       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2694       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2695       count--;
2696     }
2697     /* wait on sends of mat->i,j */
2698     /*---------------------------*/
2699     if (nsends) {
2700       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2701     }
2702   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2703 
2704   /* post receives, send and receive mat->a */
2705   /*----------------------------------------*/
2706   for (imdex=0; imdex<nrecvs; imdex++) {
2707     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2708   }
2709   for (i=0; i<nsends; i++){
2710     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2711   }
2712   count = nrecvs;
2713   while (count) {
2714     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2715     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2716     count--;
2717   }
2718   if (nsends) {
2719     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2720   }
2721 
2722   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2723 
2724   /* create redundant matrix */
2725   /*-------------------------*/
2726   if (reuse == MAT_INITIAL_MATRIX){
2727     /* compute rownz_max for preallocation */
2728     for (imdex=0; imdex<nrecvs; imdex++){
2729       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2730       rptr = rbuf_j[imdex];
2731       for (i=0; i<j; i++){
2732         ncols = rptr[i+1] - rptr[i];
2733         if (rownz_max < ncols) rownz_max = ncols;
2734       }
2735     }
2736 
2737     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2738     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2739     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2740     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2741     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2742     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2743   } else {
2744     C = *matredundant;
2745   }
2746 
2747   /* insert local matrix entries */
2748   rptr = sbuf_j;
2749   cols = sbuf_j + rend-rstart + 1;
2750   vals = sbuf_a;
2751   for (i=0; i<rend-rstart; i++){
2752     row   = i + rstart;
2753     ncols = rptr[i+1] - rptr[i];
2754     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2755     vals += ncols;
2756     cols += ncols;
2757   }
2758   /* insert received matrix entries */
2759   for (imdex=0; imdex<nrecvs; imdex++){
2760     rstart = rowrange[recv_rank[imdex]];
2761     rend   = rowrange[recv_rank[imdex]+1];
2762     rptr = rbuf_j[imdex];
2763     cols = rbuf_j[imdex] + rend-rstart + 1;
2764     vals = rbuf_a[imdex];
2765     for (i=0; i<rend-rstart; i++){
2766       row   = i + rstart;
2767       ncols = rptr[i+1] - rptr[i];
2768       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2769       vals += ncols;
2770       cols += ncols;
2771     }
2772   }
2773   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2774   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2775   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2776   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2777   if (reuse == MAT_INITIAL_MATRIX) {
2778     PetscContainer container;
2779     *matredundant = C;
2780     /* create a supporting struct and attach it to C for reuse */
2781     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2782     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2783     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2784     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2785     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2786     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2787 
2788     redund->nzlocal = nzlocal;
2789     redund->nsends  = nsends;
2790     redund->nrecvs  = nrecvs;
2791     redund->send_rank = send_rank;
2792     redund->recv_rank = recv_rank;
2793     redund->sbuf_nz = sbuf_nz;
2794     redund->rbuf_nz = rbuf_nz;
2795     redund->sbuf_j  = sbuf_j;
2796     redund->sbuf_a  = sbuf_a;
2797     redund->rbuf_j  = rbuf_j;
2798     redund->rbuf_a  = rbuf_a;
2799 
2800     redund->Destroy = C->ops->destroy;
2801     C->ops->destroy = MatDestroy_MatRedundant;
2802   }
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 #undef __FUNCT__
2807 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2808 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2809 {
2810   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2811   PetscErrorCode ierr;
2812   PetscInt       i,*idxb = 0;
2813   PetscScalar    *va,*vb;
2814   Vec            vtmp;
2815 
2816   PetscFunctionBegin;
2817   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2818   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2819   if (idx) {
2820     for (i=0; i<A->rmap->n; i++) {
2821       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2822     }
2823   }
2824 
2825   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2826   if (idx) {
2827     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2828   }
2829   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2830   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2831 
2832   for (i=0; i<A->rmap->n; i++){
2833     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2834       va[i] = vb[i];
2835       if (idx) idx[i] = a->garray[idxb[i]];
2836     }
2837   }
2838 
2839   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2840   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2841   ierr = PetscFree(idxb);CHKERRQ(ierr);
2842   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 #undef __FUNCT__
2847 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2848 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2849 {
2850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2851   PetscErrorCode ierr;
2852   PetscInt       i,*idxb = 0;
2853   PetscScalar    *va,*vb;
2854   Vec            vtmp;
2855 
2856   PetscFunctionBegin;
2857   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2858   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2859   if (idx) {
2860     for (i=0; i<A->cmap->n; i++) {
2861       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2862     }
2863   }
2864 
2865   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2866   if (idx) {
2867     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2868   }
2869   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2870   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2871 
2872   for (i=0; i<A->rmap->n; i++){
2873     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2874       va[i] = vb[i];
2875       if (idx) idx[i] = a->garray[idxb[i]];
2876     }
2877   }
2878 
2879   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2880   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2881   ierr = PetscFree(idxb);CHKERRQ(ierr);
2882   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 #undef __FUNCT__
2887 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2888 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2889 {
2890   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2891   PetscInt       n      = A->rmap->n;
2892   PetscInt       cstart = A->cmap->rstart;
2893   PetscInt      *cmap   = mat->garray;
2894   PetscInt      *diagIdx, *offdiagIdx;
2895   Vec            diagV, offdiagV;
2896   PetscScalar   *a, *diagA, *offdiagA;
2897   PetscInt       r;
2898   PetscErrorCode ierr;
2899 
2900   PetscFunctionBegin;
2901   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2902   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2903   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2904   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2905   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2906   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2907   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2908   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2909   for(r = 0; r < n; ++r) {
2910     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2911       a[r]   = diagA[r];
2912       idx[r] = cstart + diagIdx[r];
2913     } else {
2914       a[r]   = offdiagA[r];
2915       idx[r] = cmap[offdiagIdx[r]];
2916     }
2917   }
2918   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2919   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2920   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2921   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2922   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2923   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2929 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2930 {
2931   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2932   PetscInt       n      = A->rmap->n;
2933   PetscInt       cstart = A->cmap->rstart;
2934   PetscInt      *cmap   = mat->garray;
2935   PetscInt      *diagIdx, *offdiagIdx;
2936   Vec            diagV, offdiagV;
2937   PetscScalar   *a, *diagA, *offdiagA;
2938   PetscInt       r;
2939   PetscErrorCode ierr;
2940 
2941   PetscFunctionBegin;
2942   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2943   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2944   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2945   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2946   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2947   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2948   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2949   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2950   for(r = 0; r < n; ++r) {
2951     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2952       a[r]   = diagA[r];
2953       idx[r] = cstart + diagIdx[r];
2954     } else {
2955       a[r]   = offdiagA[r];
2956       idx[r] = cmap[offdiagIdx[r]];
2957     }
2958   }
2959   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2960   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2961   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2962   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2963   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2964   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2970 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2971 {
2972   PetscErrorCode ierr;
2973   Mat            *dummy;
2974 
2975   PetscFunctionBegin;
2976   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2977   *newmat = *dummy;
2978   ierr = PetscFree(dummy);CHKERRQ(ierr);
2979   PetscFunctionReturn(0);
2980 }
2981 
2982 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2983 
2984 #undef __FUNCT__
2985 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2986 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2987 {
2988   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2989   PetscErrorCode ierr;
2990 
2991   PetscFunctionBegin;
2992   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2993   PetscFunctionReturn(0);
2994 }
2995 
2996 
2997 /* -------------------------------------------------------------------*/
2998 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2999        MatGetRow_MPIAIJ,
3000        MatRestoreRow_MPIAIJ,
3001        MatMult_MPIAIJ,
3002 /* 4*/ MatMultAdd_MPIAIJ,
3003        MatMultTranspose_MPIAIJ,
3004        MatMultTransposeAdd_MPIAIJ,
3005 #ifdef PETSC_HAVE_PBGL
3006        MatSolve_MPIAIJ,
3007 #else
3008        0,
3009 #endif
3010        0,
3011        0,
3012 /*10*/ 0,
3013        0,
3014        0,
3015        MatSOR_MPIAIJ,
3016        MatTranspose_MPIAIJ,
3017 /*15*/ MatGetInfo_MPIAIJ,
3018        MatEqual_MPIAIJ,
3019        MatGetDiagonal_MPIAIJ,
3020        MatDiagonalScale_MPIAIJ,
3021        MatNorm_MPIAIJ,
3022 /*20*/ MatAssemblyBegin_MPIAIJ,
3023        MatAssemblyEnd_MPIAIJ,
3024        MatSetOption_MPIAIJ,
3025        MatZeroEntries_MPIAIJ,
3026 /*24*/ MatZeroRows_MPIAIJ,
3027        0,
3028 #ifdef PETSC_HAVE_PBGL
3029        0,
3030 #else
3031        0,
3032 #endif
3033        0,
3034        0,
3035 /*29*/ MatSetUp_MPIAIJ,
3036 #ifdef PETSC_HAVE_PBGL
3037        0,
3038 #else
3039        0,
3040 #endif
3041        0,
3042        0,
3043        0,
3044 /*34*/ MatDuplicate_MPIAIJ,
3045        0,
3046        0,
3047        0,
3048        0,
3049 /*39*/ MatAXPY_MPIAIJ,
3050        MatGetSubMatrices_MPIAIJ,
3051        MatIncreaseOverlap_MPIAIJ,
3052        MatGetValues_MPIAIJ,
3053        MatCopy_MPIAIJ,
3054 /*44*/ MatGetRowMax_MPIAIJ,
3055        MatScale_MPIAIJ,
3056        0,
3057        0,
3058        MatZeroRowsColumns_MPIAIJ,
3059 /*49*/ 0,
3060        0,
3061        0,
3062        0,
3063        0,
3064 /*54*/ MatFDColoringCreate_MPIAIJ,
3065        0,
3066        MatSetUnfactored_MPIAIJ,
3067        0, /* MatPermute_MPIAIJ, impl currently broken */
3068        0,
3069 /*59*/ MatGetSubMatrix_MPIAIJ,
3070        MatDestroy_MPIAIJ,
3071        MatView_MPIAIJ,
3072        0,
3073        0,
3074 /*64*/ 0,
3075        0,
3076        0,
3077        0,
3078        0,
3079 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3080        MatGetRowMinAbs_MPIAIJ,
3081        0,
3082        MatSetColoring_MPIAIJ,
3083 #if defined(PETSC_HAVE_ADIC)
3084        MatSetValuesAdic_MPIAIJ,
3085 #else
3086        0,
3087 #endif
3088        MatSetValuesAdifor_MPIAIJ,
3089 /*75*/ MatFDColoringApply_AIJ,
3090        0,
3091        0,
3092        0,
3093        0,
3094 /*80*/ 0,
3095        0,
3096        0,
3097 /*83*/ MatLoad_MPIAIJ,
3098        0,
3099        0,
3100        0,
3101        0,
3102        0,
3103 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3104        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3105        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3106        MatPtAP_Basic,
3107        MatPtAPSymbolic_MPIAIJ,
3108 /*94*/ MatPtAPNumeric_MPIAIJ,
3109        0,
3110        0,
3111        0,
3112        0,
3113 /*99*/ 0,
3114        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3115        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3116        MatConjugate_MPIAIJ,
3117        0,
3118 /*104*/MatSetValuesRow_MPIAIJ,
3119        MatRealPart_MPIAIJ,
3120        MatImaginaryPart_MPIAIJ,
3121        0,
3122        0,
3123 /*109*/0,
3124        MatGetRedundantMatrix_MPIAIJ,
3125        MatGetRowMin_MPIAIJ,
3126        0,
3127        0,
3128 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3129        0,
3130        0,
3131        0,
3132        0,
3133 /*119*/0,
3134        0,
3135        0,
3136        0,
3137        MatGetMultiProcBlock_MPIAIJ,
3138 /*124*/MatFindNonZeroRows_MPIAIJ,
3139        MatGetColumnNorms_MPIAIJ,
3140        MatInvertBlockDiagonal_MPIAIJ,
3141        0,
3142        MatGetSubMatricesParallel_MPIAIJ,
3143 /*129*/0,
3144        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3145        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3146        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3147        0,
3148 /*134*/0,
3149        0,
3150        0,
3151        0,
3152        0
3153 };
3154 
3155 /* ----------------------------------------------------------------------------------------*/
3156 
3157 EXTERN_C_BEGIN
3158 #undef __FUNCT__
3159 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3160 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3161 {
3162   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3163   PetscErrorCode ierr;
3164 
3165   PetscFunctionBegin;
3166   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3167   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3168   PetscFunctionReturn(0);
3169 }
3170 EXTERN_C_END
3171 
3172 EXTERN_C_BEGIN
3173 #undef __FUNCT__
3174 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3175 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3176 {
3177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3178   PetscErrorCode ierr;
3179 
3180   PetscFunctionBegin;
3181   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3182   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3183   PetscFunctionReturn(0);
3184 }
3185 EXTERN_C_END
3186 
3187 EXTERN_C_BEGIN
3188 #undef __FUNCT__
3189 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3190 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3191 {
3192   Mat_MPIAIJ     *b;
3193   PetscErrorCode ierr;
3194   PetscInt       i;
3195   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3196 
3197   PetscFunctionBegin;
3198   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3199   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3200   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3201   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3202   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3203   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3204 
3205   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3206   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3207   if (d_nnz) {
3208     for (i=0; i<B->rmap->n; i++) {
3209       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3210     }
3211   }
3212   if (o_nnz) {
3213     for (i=0; i<B->rmap->n; i++) {
3214       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3215     }
3216   }
3217   b = (Mat_MPIAIJ*)B->data;
3218 
3219   if (!B->preallocated) {
3220     /* Explicitly create 2 MATSEQAIJ matrices. */
3221     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3222     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3223     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3224     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3225     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3226     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3227     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3228     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3229     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3230     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3231   }
3232 
3233   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3234   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3235   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3236   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3237   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3238   B->preallocated = PETSC_TRUE;
3239   PetscFunctionReturn(0);
3240 }
3241 EXTERN_C_END
3242 
3243 #undef __FUNCT__
3244 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3245 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3246 {
3247   Mat            mat;
3248   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3249   PetscErrorCode ierr;
3250 
3251   PetscFunctionBegin;
3252   *newmat       = 0;
3253   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3254   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3255   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3256   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3257   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3258   a    = (Mat_MPIAIJ*)mat->data;
3259 
3260   mat->factortype    = matin->factortype;
3261   mat->rmap->bs      = matin->rmap->bs;
3262   mat->cmap->bs      = matin->cmap->bs;
3263   mat->assembled    = PETSC_TRUE;
3264   mat->insertmode   = NOT_SET_VALUES;
3265   mat->preallocated = PETSC_TRUE;
3266 
3267   a->size           = oldmat->size;
3268   a->rank           = oldmat->rank;
3269   a->donotstash     = oldmat->donotstash;
3270   a->roworiented    = oldmat->roworiented;
3271   a->rowindices     = 0;
3272   a->rowvalues      = 0;
3273   a->getrowactive   = PETSC_FALSE;
3274 
3275   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3276   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3277 
3278   if (oldmat->colmap) {
3279 #if defined (PETSC_USE_CTABLE)
3280     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3281 #else
3282     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3283     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3284     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3285 #endif
3286   } else a->colmap = 0;
3287   if (oldmat->garray) {
3288     PetscInt len;
3289     len  = oldmat->B->cmap->n;
3290     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3291     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3292     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3293   } else a->garray = 0;
3294 
3295   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3296   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3297   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3298   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3299   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3300   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3301   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3302   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3303   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3304   *newmat = mat;
3305   PetscFunctionReturn(0);
3306 }
3307 
3308 
3309 
3310 #undef __FUNCT__
3311 #define __FUNCT__ "MatLoad_MPIAIJ"
3312 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3313 {
3314   PetscScalar    *vals,*svals;
3315   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3316   PetscErrorCode ierr;
3317   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3318   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3319   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3320   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3321   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3322   int            fd;
3323   PetscInt       bs = 1;
3324 
3325   PetscFunctionBegin;
3326   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3327   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3328   if (!rank) {
3329     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3330     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3331     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3332   }
3333 
3334   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3335   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3336   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3337 
3338   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3339 
3340   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3341   M = header[1]; N = header[2];
3342   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3343   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3344   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3345 
3346   /* If global sizes are set, check if they are consistent with that given in the file */
3347   if (sizesset) {
3348     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3349   }
3350   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3351   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3352 
3353   /* determine ownership of all (block) rows */
3354   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3355   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3356   else m = newMat->rmap->n; /* Set by user */
3357 
3358   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3359   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3360 
3361   /* First process needs enough room for process with most rows */
3362   if (!rank) {
3363     mmax       = rowners[1];
3364     for (i=2; i<size; i++) {
3365       mmax = PetscMax(mmax,rowners[i]);
3366     }
3367   } else mmax = m;
3368 
3369   rowners[0] = 0;
3370   for (i=2; i<=size; i++) {
3371     rowners[i] += rowners[i-1];
3372   }
3373   rstart = rowners[rank];
3374   rend   = rowners[rank+1];
3375 
3376   /* distribute row lengths to all processors */
3377   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3378   if (!rank) {
3379     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3380     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3381     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3382     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3383     for (j=0; j<m; j++) {
3384       procsnz[0] += ourlens[j];
3385     }
3386     for (i=1; i<size; i++) {
3387       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3388       /* calculate the number of nonzeros on each processor */
3389       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3390         procsnz[i] += rowlengths[j];
3391       }
3392       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3393     }
3394     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3395   } else {
3396     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3397   }
3398 
3399   if (!rank) {
3400     /* determine max buffer needed and allocate it */
3401     maxnz = 0;
3402     for (i=0; i<size; i++) {
3403       maxnz = PetscMax(maxnz,procsnz[i]);
3404     }
3405     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3406 
3407     /* read in my part of the matrix column indices  */
3408     nz   = procsnz[0];
3409     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3410     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3411 
3412     /* read in every one elses and ship off */
3413     for (i=1; i<size; i++) {
3414       nz     = procsnz[i];
3415       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3416       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3417     }
3418     ierr = PetscFree(cols);CHKERRQ(ierr);
3419   } else {
3420     /* determine buffer space needed for message */
3421     nz = 0;
3422     for (i=0; i<m; i++) {
3423       nz += ourlens[i];
3424     }
3425     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3426 
3427     /* receive message of column indices*/
3428     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3429   }
3430 
3431   /* determine column ownership if matrix is not square */
3432   if (N != M) {
3433     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3434     else n = newMat->cmap->n;
3435     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3436     cstart = cend - n;
3437   } else {
3438     cstart = rstart;
3439     cend   = rend;
3440     n      = cend - cstart;
3441   }
3442 
3443   /* loop over local rows, determining number of off diagonal entries */
3444   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3445   jj = 0;
3446   for (i=0; i<m; i++) {
3447     for (j=0; j<ourlens[i]; j++) {
3448       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3449       jj++;
3450     }
3451   }
3452 
3453   for (i=0; i<m; i++) {
3454     ourlens[i] -= offlens[i];
3455   }
3456   if (!sizesset) {
3457     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3458   }
3459 
3460   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3461 
3462   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3463 
3464   for (i=0; i<m; i++) {
3465     ourlens[i] += offlens[i];
3466   }
3467 
3468   if (!rank) {
3469     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3470 
3471     /* read in my part of the matrix numerical values  */
3472     nz   = procsnz[0];
3473     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3474 
3475     /* insert into matrix */
3476     jj      = rstart;
3477     smycols = mycols;
3478     svals   = vals;
3479     for (i=0; i<m; i++) {
3480       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3481       smycols += ourlens[i];
3482       svals   += ourlens[i];
3483       jj++;
3484     }
3485 
3486     /* read in other processors and ship out */
3487     for (i=1; i<size; i++) {
3488       nz     = procsnz[i];
3489       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3490       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3491     }
3492     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3493   } else {
3494     /* receive numeric values */
3495     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3496 
3497     /* receive message of values*/
3498     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3499 
3500     /* insert into matrix */
3501     jj      = rstart;
3502     smycols = mycols;
3503     svals   = vals;
3504     for (i=0; i<m; i++) {
3505       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3506       smycols += ourlens[i];
3507       svals   += ourlens[i];
3508       jj++;
3509     }
3510   }
3511   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3512   ierr = PetscFree(vals);CHKERRQ(ierr);
3513   ierr = PetscFree(mycols);CHKERRQ(ierr);
3514   ierr = PetscFree(rowners);CHKERRQ(ierr);
3515   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3516   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3517 
3518   PetscFunctionReturn(0);
3519 }
3520 
3521 #undef __FUNCT__
3522 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3523 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3524 {
3525   PetscErrorCode ierr;
3526   IS             iscol_local;
3527   PetscInt       csize;
3528 
3529   PetscFunctionBegin;
3530   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3531   if (call == MAT_REUSE_MATRIX) {
3532     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3533     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3534   } else {
3535     PetscInt cbs;
3536     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3537     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3538     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3539   }
3540   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3541   if (call == MAT_INITIAL_MATRIX) {
3542     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3543     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3544   }
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3549 #undef __FUNCT__
3550 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3551 /*
3552     Not great since it makes two copies of the submatrix, first an SeqAIJ
3553   in local and then by concatenating the local matrices the end result.
3554   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3555 
3556   Note: This requires a sequential iscol with all indices.
3557 */
3558 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3559 {
3560   PetscErrorCode ierr;
3561   PetscMPIInt    rank,size;
3562   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3563   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3564   PetscBool      allcolumns, colflag;
3565   Mat            M,Mreuse;
3566   MatScalar      *vwork,*aa;
3567   MPI_Comm       comm = ((PetscObject)mat)->comm;
3568   Mat_SeqAIJ     *aij;
3569 
3570 
3571   PetscFunctionBegin;
3572   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3574 
3575   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3576   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3577   if (colflag && ncol == mat->cmap->N){
3578     allcolumns = PETSC_TRUE;
3579   } else {
3580     allcolumns = PETSC_FALSE;
3581   }
3582   if (call ==  MAT_REUSE_MATRIX) {
3583     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3584     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3585     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3586   } else {
3587     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3588   }
3589 
3590   /*
3591       m - number of local rows
3592       n - number of columns (same on all processors)
3593       rstart - first row in new global matrix generated
3594   */
3595   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3596   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3597   if (call == MAT_INITIAL_MATRIX) {
3598     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3599     ii  = aij->i;
3600     jj  = aij->j;
3601 
3602     /*
3603         Determine the number of non-zeros in the diagonal and off-diagonal
3604         portions of the matrix in order to do correct preallocation
3605     */
3606 
3607     /* first get start and end of "diagonal" columns */
3608     if (csize == PETSC_DECIDE) {
3609       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3610       if (mglobal == n) { /* square matrix */
3611 	nlocal = m;
3612       } else {
3613         nlocal = n/size + ((n % size) > rank);
3614       }
3615     } else {
3616       nlocal = csize;
3617     }
3618     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3619     rstart = rend - nlocal;
3620     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3621 
3622     /* next, compute all the lengths */
3623     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3624     olens = dlens + m;
3625     for (i=0; i<m; i++) {
3626       jend = ii[i+1] - ii[i];
3627       olen = 0;
3628       dlen = 0;
3629       for (j=0; j<jend; j++) {
3630         if (*jj < rstart || *jj >= rend) olen++;
3631         else dlen++;
3632         jj++;
3633       }
3634       olens[i] = olen;
3635       dlens[i] = dlen;
3636     }
3637     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3638     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3639     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3640     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3641     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3642     ierr = PetscFree(dlens);CHKERRQ(ierr);
3643   } else {
3644     PetscInt ml,nl;
3645 
3646     M = *newmat;
3647     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3648     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3649     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3650     /*
3651          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3652        rather than the slower MatSetValues().
3653     */
3654     M->was_assembled = PETSC_TRUE;
3655     M->assembled     = PETSC_FALSE;
3656   }
3657   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3658   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3659   ii  = aij->i;
3660   jj  = aij->j;
3661   aa  = aij->a;
3662   for (i=0; i<m; i++) {
3663     row   = rstart + i;
3664     nz    = ii[i+1] - ii[i];
3665     cwork = jj;     jj += nz;
3666     vwork = aa;     aa += nz;
3667     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3668   }
3669 
3670   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3671   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3672   *newmat = M;
3673 
3674   /* save submatrix used in processor for next request */
3675   if (call ==  MAT_INITIAL_MATRIX) {
3676     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3677     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3678   }
3679 
3680   PetscFunctionReturn(0);
3681 }
3682 
3683 EXTERN_C_BEGIN
3684 #undef __FUNCT__
3685 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3686 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3687 {
3688   PetscInt       m,cstart, cend,j,nnz,i,d;
3689   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3690   const PetscInt *JJ;
3691   PetscScalar    *values;
3692   PetscErrorCode ierr;
3693 
3694   PetscFunctionBegin;
3695   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3696 
3697   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3698   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3699   m      = B->rmap->n;
3700   cstart = B->cmap->rstart;
3701   cend   = B->cmap->rend;
3702   rstart = B->rmap->rstart;
3703 
3704   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3705 
3706 #if defined(PETSC_USE_DEBUGGING)
3707   for (i=0; i<m; i++) {
3708     nnz     = Ii[i+1]- Ii[i];
3709     JJ      = J + Ii[i];
3710     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3711     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3712     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3713   }
3714 #endif
3715 
3716   for (i=0; i<m; i++) {
3717     nnz     = Ii[i+1]- Ii[i];
3718     JJ      = J + Ii[i];
3719     nnz_max = PetscMax(nnz_max,nnz);
3720     d       = 0;
3721     for (j=0; j<nnz; j++) {
3722       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3723     }
3724     d_nnz[i] = d;
3725     o_nnz[i] = nnz - d;
3726   }
3727   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3728   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3729 
3730   if (v) values = (PetscScalar*)v;
3731   else {
3732     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3733     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3734   }
3735 
3736   for (i=0; i<m; i++) {
3737     ii   = i + rstart;
3738     nnz  = Ii[i+1]- Ii[i];
3739     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3740   }
3741   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3742   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3743 
3744   if (!v) {
3745     ierr = PetscFree(values);CHKERRQ(ierr);
3746   }
3747   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3748   PetscFunctionReturn(0);
3749 }
3750 EXTERN_C_END
3751 
3752 #undef __FUNCT__
3753 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3754 /*@
3755    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3756    (the default parallel PETSc format).
3757 
3758    Collective on MPI_Comm
3759 
3760    Input Parameters:
3761 +  B - the matrix
3762 .  i - the indices into j for the start of each local row (starts with zero)
3763 .  j - the column indices for each local row (starts with zero)
3764 -  v - optional values in the matrix
3765 
3766    Level: developer
3767 
3768    Notes:
3769        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3770      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3771      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3772 
3773        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3774 
3775        The format which is used for the sparse matrix input, is equivalent to a
3776     row-major ordering.. i.e for the following matrix, the input data expected is
3777     as shown:
3778 
3779         1 0 0
3780         2 0 3     P0
3781        -------
3782         4 5 6     P1
3783 
3784      Process0 [P0]: rows_owned=[0,1]
3785         i =  {0,1,3}  [size = nrow+1  = 2+1]
3786         j =  {0,0,2}  [size = nz = 6]
3787         v =  {1,2,3}  [size = nz = 6]
3788 
3789      Process1 [P1]: rows_owned=[2]
3790         i =  {0,3}    [size = nrow+1  = 1+1]
3791         j =  {0,1,2}  [size = nz = 6]
3792         v =  {4,5,6}  [size = nz = 6]
3793 
3794 .keywords: matrix, aij, compressed row, sparse, parallel
3795 
3796 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3797           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3798 @*/
3799 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3800 {
3801   PetscErrorCode ierr;
3802 
3803   PetscFunctionBegin;
3804   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3805   PetscFunctionReturn(0);
3806 }
3807 
3808 #undef __FUNCT__
3809 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3810 /*@C
3811    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3812    (the default parallel PETSc format).  For good matrix assembly performance
3813    the user should preallocate the matrix storage by setting the parameters
3814    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3815    performance can be increased by more than a factor of 50.
3816 
3817    Collective on MPI_Comm
3818 
3819    Input Parameters:
3820 +  A - the matrix
3821 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3822            (same value is used for all local rows)
3823 .  d_nnz - array containing the number of nonzeros in the various rows of the
3824            DIAGONAL portion of the local submatrix (possibly different for each row)
3825            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3826            The size of this array is equal to the number of local rows, i.e 'm'.
3827            For matrices that will be factored, you must leave room for (and set)
3828            the diagonal entry even if it is zero.
3829 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3830            submatrix (same value is used for all local rows).
3831 -  o_nnz - array containing the number of nonzeros in the various rows of the
3832            OFF-DIAGONAL portion of the local submatrix (possibly different for
3833            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3834            structure. The size of this array is equal to the number
3835            of local rows, i.e 'm'.
3836 
3837    If the *_nnz parameter is given then the *_nz parameter is ignored
3838 
3839    The AIJ format (also called the Yale sparse matrix format or
3840    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3841    storage.  The stored row and column indices begin with zero.
3842    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3843 
3844    The parallel matrix is partitioned such that the first m0 rows belong to
3845    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3846    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3847 
3848    The DIAGONAL portion of the local submatrix of a processor can be defined
3849    as the submatrix which is obtained by extraction the part corresponding to
3850    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3851    first row that belongs to the processor, r2 is the last row belonging to
3852    the this processor, and c1-c2 is range of indices of the local part of a
3853    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3854    common case of a square matrix, the row and column ranges are the same and
3855    the DIAGONAL part is also square. The remaining portion of the local
3856    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3857 
3858    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3859 
3860    You can call MatGetInfo() to get information on how effective the preallocation was;
3861    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3862    You can also run with the option -info and look for messages with the string
3863    malloc in them to see if additional memory allocation was needed.
3864 
3865    Example usage:
3866 
3867    Consider the following 8x8 matrix with 34 non-zero values, that is
3868    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3869    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3870    as follows:
3871 
3872 .vb
3873             1  2  0  |  0  3  0  |  0  4
3874     Proc0   0  5  6  |  7  0  0  |  8  0
3875             9  0 10  | 11  0  0  | 12  0
3876     -------------------------------------
3877            13  0 14  | 15 16 17  |  0  0
3878     Proc1   0 18  0  | 19 20 21  |  0  0
3879             0  0  0  | 22 23  0  | 24  0
3880     -------------------------------------
3881     Proc2  25 26 27  |  0  0 28  | 29  0
3882            30  0  0  | 31 32 33  |  0 34
3883 .ve
3884 
3885    This can be represented as a collection of submatrices as:
3886 
3887 .vb
3888       A B C
3889       D E F
3890       G H I
3891 .ve
3892 
3893    Where the submatrices A,B,C are owned by proc0, D,E,F are
3894    owned by proc1, G,H,I are owned by proc2.
3895 
3896    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3897    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3898    The 'M','N' parameters are 8,8, and have the same values on all procs.
3899 
3900    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3901    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3902    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3903    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3904    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3905    matrix, ans [DF] as another SeqAIJ matrix.
3906 
3907    When d_nz, o_nz parameters are specified, d_nz storage elements are
3908    allocated for every row of the local diagonal submatrix, and o_nz
3909    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3910    One way to choose d_nz and o_nz is to use the max nonzerors per local
3911    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3912    In this case, the values of d_nz,o_nz are:
3913 .vb
3914      proc0 : dnz = 2, o_nz = 2
3915      proc1 : dnz = 3, o_nz = 2
3916      proc2 : dnz = 1, o_nz = 4
3917 .ve
3918    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3919    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3920    for proc3. i.e we are using 12+15+10=37 storage locations to store
3921    34 values.
3922 
3923    When d_nnz, o_nnz parameters are specified, the storage is specified
3924    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3925    In the above case the values for d_nnz,o_nnz are:
3926 .vb
3927      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3928      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3929      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3930 .ve
3931    Here the space allocated is sum of all the above values i.e 34, and
3932    hence pre-allocation is perfect.
3933 
3934    Level: intermediate
3935 
3936 .keywords: matrix, aij, compressed row, sparse, parallel
3937 
3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3939           MPIAIJ, MatGetInfo()
3940 @*/
3941 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3942 {
3943   PetscErrorCode ierr;
3944 
3945   PetscFunctionBegin;
3946   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3947   PetscValidType(B,1);
3948   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3949   PetscFunctionReturn(0);
3950 }
3951 
3952 #undef __FUNCT__
3953 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3954 /*@
3955      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3956          CSR format the local rows.
3957 
3958    Collective on MPI_Comm
3959 
3960    Input Parameters:
3961 +  comm - MPI communicator
3962 .  m - number of local rows (Cannot be PETSC_DECIDE)
3963 .  n - This value should be the same as the local size used in creating the
3964        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3965        calculated if N is given) For square matrices n is almost always m.
3966 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3967 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3968 .   i - row indices
3969 .   j - column indices
3970 -   a - matrix values
3971 
3972    Output Parameter:
3973 .   mat - the matrix
3974 
3975    Level: intermediate
3976 
3977    Notes:
3978        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3979      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3980      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3981 
3982        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3983 
3984        The format which is used for the sparse matrix input, is equivalent to a
3985     row-major ordering.. i.e for the following matrix, the input data expected is
3986     as shown:
3987 
3988         1 0 0
3989         2 0 3     P0
3990        -------
3991         4 5 6     P1
3992 
3993      Process0 [P0]: rows_owned=[0,1]
3994         i =  {0,1,3}  [size = nrow+1  = 2+1]
3995         j =  {0,0,2}  [size = nz = 6]
3996         v =  {1,2,3}  [size = nz = 6]
3997 
3998      Process1 [P1]: rows_owned=[2]
3999         i =  {0,3}    [size = nrow+1  = 1+1]
4000         j =  {0,1,2}  [size = nz = 6]
4001         v =  {4,5,6}  [size = nz = 6]
4002 
4003 .keywords: matrix, aij, compressed row, sparse, parallel
4004 
4005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4006           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4007 @*/
4008 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4009 {
4010   PetscErrorCode ierr;
4011 
4012   PetscFunctionBegin;
4013   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4014   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4015   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4016   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4017   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4018   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4019   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4020   PetscFunctionReturn(0);
4021 }
4022 
4023 #undef __FUNCT__
4024 #define __FUNCT__ "MatCreateAIJ"
4025 /*@C
4026    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4027    (the default parallel PETSc format).  For good matrix assembly performance
4028    the user should preallocate the matrix storage by setting the parameters
4029    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4030    performance can be increased by more than a factor of 50.
4031 
4032    Collective on MPI_Comm
4033 
4034    Input Parameters:
4035 +  comm - MPI communicator
4036 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4037            This value should be the same as the local size used in creating the
4038            y vector for the matrix-vector product y = Ax.
4039 .  n - This value should be the same as the local size used in creating the
4040        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4041        calculated if N is given) For square matrices n is almost always m.
4042 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4043 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4044 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4045            (same value is used for all local rows)
4046 .  d_nnz - array containing the number of nonzeros in the various rows of the
4047            DIAGONAL portion of the local submatrix (possibly different for each row)
4048            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4049            The size of this array is equal to the number of local rows, i.e 'm'.
4050 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4051            submatrix (same value is used for all local rows).
4052 -  o_nnz - array containing the number of nonzeros in the various rows of the
4053            OFF-DIAGONAL portion of the local submatrix (possibly different for
4054            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4055            structure. The size of this array is equal to the number
4056            of local rows, i.e 'm'.
4057 
4058    Output Parameter:
4059 .  A - the matrix
4060 
4061    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4062    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4063    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4064 
4065    Notes:
4066    If the *_nnz parameter is given then the *_nz parameter is ignored
4067 
4068    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4069    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4070    storage requirements for this matrix.
4071 
4072    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4073    processor than it must be used on all processors that share the object for
4074    that argument.
4075 
4076    The user MUST specify either the local or global matrix dimensions
4077    (possibly both).
4078 
4079    The parallel matrix is partitioned across processors such that the
4080    first m0 rows belong to process 0, the next m1 rows belong to
4081    process 1, the next m2 rows belong to process 2 etc.. where
4082    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4083    values corresponding to [m x N] submatrix.
4084 
4085    The columns are logically partitioned with the n0 columns belonging
4086    to 0th partition, the next n1 columns belonging to the next
4087    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4088 
4089    The DIAGONAL portion of the local submatrix on any given processor
4090    is the submatrix corresponding to the rows and columns m,n
4091    corresponding to the given processor. i.e diagonal matrix on
4092    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4093    etc. The remaining portion of the local submatrix [m x (N-n)]
4094    constitute the OFF-DIAGONAL portion. The example below better
4095    illustrates this concept.
4096 
4097    For a square global matrix we define each processor's diagonal portion
4098    to be its local rows and the corresponding columns (a square submatrix);
4099    each processor's off-diagonal portion encompasses the remainder of the
4100    local matrix (a rectangular submatrix).
4101 
4102    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4103 
4104    When calling this routine with a single process communicator, a matrix of
4105    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4106    type of communicator, use the construction mechanism:
4107      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4108 
4109    By default, this format uses inodes (identical nodes) when possible.
4110    We search for consecutive rows with the same nonzero structure, thereby
4111    reusing matrix information to achieve increased efficiency.
4112 
4113    Options Database Keys:
4114 +  -mat_no_inode  - Do not use inodes
4115 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4116 -  -mat_aij_oneindex - Internally use indexing starting at 1
4117         rather than 0.  Note that when calling MatSetValues(),
4118         the user still MUST index entries starting at 0!
4119 
4120 
4121    Example usage:
4122 
4123    Consider the following 8x8 matrix with 34 non-zero values, that is
4124    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4125    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4126    as follows:
4127 
4128 .vb
4129             1  2  0  |  0  3  0  |  0  4
4130     Proc0   0  5  6  |  7  0  0  |  8  0
4131             9  0 10  | 11  0  0  | 12  0
4132     -------------------------------------
4133            13  0 14  | 15 16 17  |  0  0
4134     Proc1   0 18  0  | 19 20 21  |  0  0
4135             0  0  0  | 22 23  0  | 24  0
4136     -------------------------------------
4137     Proc2  25 26 27  |  0  0 28  | 29  0
4138            30  0  0  | 31 32 33  |  0 34
4139 .ve
4140 
4141    This can be represented as a collection of submatrices as:
4142 
4143 .vb
4144       A B C
4145       D E F
4146       G H I
4147 .ve
4148 
4149    Where the submatrices A,B,C are owned by proc0, D,E,F are
4150    owned by proc1, G,H,I are owned by proc2.
4151 
4152    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4153    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4154    The 'M','N' parameters are 8,8, and have the same values on all procs.
4155 
4156    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4157    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4158    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4159    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4160    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4161    matrix, ans [DF] as another SeqAIJ matrix.
4162 
4163    When d_nz, o_nz parameters are specified, d_nz storage elements are
4164    allocated for every row of the local diagonal submatrix, and o_nz
4165    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4166    One way to choose d_nz and o_nz is to use the max nonzerors per local
4167    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4168    In this case, the values of d_nz,o_nz are:
4169 .vb
4170      proc0 : dnz = 2, o_nz = 2
4171      proc1 : dnz = 3, o_nz = 2
4172      proc2 : dnz = 1, o_nz = 4
4173 .ve
4174    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4175    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4176    for proc3. i.e we are using 12+15+10=37 storage locations to store
4177    34 values.
4178 
4179    When d_nnz, o_nnz parameters are specified, the storage is specified
4180    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4181    In the above case the values for d_nnz,o_nnz are:
4182 .vb
4183      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4184      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4185      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4186 .ve
4187    Here the space allocated is sum of all the above values i.e 34, and
4188    hence pre-allocation is perfect.
4189 
4190    Level: intermediate
4191 
4192 .keywords: matrix, aij, compressed row, sparse, parallel
4193 
4194 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4195           MPIAIJ, MatCreateMPIAIJWithArrays()
4196 @*/
4197 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4198 {
4199   PetscErrorCode ierr;
4200   PetscMPIInt    size;
4201 
4202   PetscFunctionBegin;
4203   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4204   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4205   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4206   if (size > 1) {
4207     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4208     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4209   } else {
4210     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4211     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4212   }
4213   PetscFunctionReturn(0);
4214 }
4215 
4216 #undef __FUNCT__
4217 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4218 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4219 {
4220   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4221 
4222   PetscFunctionBegin;
4223   *Ad     = a->A;
4224   *Ao     = a->B;
4225   *colmap = a->garray;
4226   PetscFunctionReturn(0);
4227 }
4228 
4229 #undef __FUNCT__
4230 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4231 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4232 {
4233   PetscErrorCode ierr;
4234   PetscInt       i;
4235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4236 
4237   PetscFunctionBegin;
4238   if (coloring->ctype == IS_COLORING_GLOBAL) {
4239     ISColoringValue *allcolors,*colors;
4240     ISColoring      ocoloring;
4241 
4242     /* set coloring for diagonal portion */
4243     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4244 
4245     /* set coloring for off-diagonal portion */
4246     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4247     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4248     for (i=0; i<a->B->cmap->n; i++) {
4249       colors[i] = allcolors[a->garray[i]];
4250     }
4251     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4252     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4253     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4254     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4255   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4256     ISColoringValue *colors;
4257     PetscInt        *larray;
4258     ISColoring      ocoloring;
4259 
4260     /* set coloring for diagonal portion */
4261     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4262     for (i=0; i<a->A->cmap->n; i++) {
4263       larray[i] = i + A->cmap->rstart;
4264     }
4265     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4266     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4267     for (i=0; i<a->A->cmap->n; i++) {
4268       colors[i] = coloring->colors[larray[i]];
4269     }
4270     ierr = PetscFree(larray);CHKERRQ(ierr);
4271     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4272     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4273     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4274 
4275     /* set coloring for off-diagonal portion */
4276     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4277     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4278     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4279     for (i=0; i<a->B->cmap->n; i++) {
4280       colors[i] = coloring->colors[larray[i]];
4281     }
4282     ierr = PetscFree(larray);CHKERRQ(ierr);
4283     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4284     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4285     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4286   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4287 
4288   PetscFunctionReturn(0);
4289 }
4290 
4291 #if defined(PETSC_HAVE_ADIC)
4292 #undef __FUNCT__
4293 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4294 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4295 {
4296   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4297   PetscErrorCode ierr;
4298 
4299   PetscFunctionBegin;
4300   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4301   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4302   PetscFunctionReturn(0);
4303 }
4304 #endif
4305 
4306 #undef __FUNCT__
4307 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4308 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4309 {
4310   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4311   PetscErrorCode ierr;
4312 
4313   PetscFunctionBegin;
4314   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4315   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4316   PetscFunctionReturn(0);
4317 }
4318 
4319 #undef __FUNCT__
4320 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4321 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4322 {
4323   PetscErrorCode ierr;
4324   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4325   PetscInt       *indx;
4326 
4327   PetscFunctionBegin;
4328   /* This routine will ONLY return MPIAIJ type matrix */
4329   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4330   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4331   if (n == PETSC_DECIDE){
4332     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4333   }
4334   /* Check sum(n) = N */
4335   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4336   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4337 
4338   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4339   rstart -= m;
4340 
4341   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4342   for (i=0;i<m;i++) {
4343     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4344     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4345     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4346   }
4347 
4348   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4349   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4350   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4351   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4352   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4353   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4354   PetscFunctionReturn(0);
4355 }
4356 
4357 #undef __FUNCT__
4358 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4359 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4360 {
4361   PetscErrorCode ierr;
4362   PetscInt       m,N,i,rstart,nnz,Ii;
4363   PetscInt       *indx;
4364   PetscScalar    *values;
4365 
4366   PetscFunctionBegin;
4367   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4368   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4369   for (i=0;i<m;i++) {
4370     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4371     Ii    = i + rstart;
4372     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4373     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4374   }
4375   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4376   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 #undef __FUNCT__
4381 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4382 /*@
4383       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4384                  matrices from each processor
4385 
4386     Collective on MPI_Comm
4387 
4388    Input Parameters:
4389 +    comm - the communicators the parallel matrix will live on
4390 .    inmat - the input sequential matrices
4391 .    n - number of local columns (or PETSC_DECIDE)
4392 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4393 
4394    Output Parameter:
4395 .    outmat - the parallel matrix generated
4396 
4397     Level: advanced
4398 
4399    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4400 
4401 @*/
4402 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4403 {
4404   PetscErrorCode ierr;
4405 
4406   PetscFunctionBegin;
4407   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4408   if (scall == MAT_INITIAL_MATRIX){
4409     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4410   }
4411   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4412   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 #undef __FUNCT__
4417 #define __FUNCT__ "MatFileSplit"
4418 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4419 {
4420   PetscErrorCode    ierr;
4421   PetscMPIInt       rank;
4422   PetscInt          m,N,i,rstart,nnz;
4423   size_t            len;
4424   const PetscInt    *indx;
4425   PetscViewer       out;
4426   char              *name;
4427   Mat               B;
4428   const PetscScalar *values;
4429 
4430   PetscFunctionBegin;
4431   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4432   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4433   /* Should this be the type of the diagonal block of A? */
4434   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4435   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4436   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4437   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4438   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4439   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4440   for (i=0;i<m;i++) {
4441     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4442     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4443     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4444   }
4445   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4446   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4447 
4448   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4449   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4450   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4451   sprintf(name,"%s.%d",outfile,rank);
4452   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4453   ierr = PetscFree(name);
4454   ierr = MatView(B,out);CHKERRQ(ierr);
4455   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4456   ierr = MatDestroy(&B);CHKERRQ(ierr);
4457   PetscFunctionReturn(0);
4458 }
4459 
4460 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4461 #undef __FUNCT__
4462 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4463 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4464 {
4465   PetscErrorCode       ierr;
4466   Mat_Merge_SeqsToMPI  *merge;
4467   PetscContainer       container;
4468 
4469   PetscFunctionBegin;
4470   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4471   if (container) {
4472     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4485     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4486     ierr = PetscFree(merge);CHKERRQ(ierr);
4487     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4488   }
4489   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4490   PetscFunctionReturn(0);
4491 }
4492 
4493 #include <../src/mat/utils/freespace.h>
4494 #include <petscbt.h>
4495 
4496 #undef __FUNCT__
4497 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4498 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4499 {
4500   PetscErrorCode       ierr;
4501   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4502   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4503   PetscMPIInt          size,rank,taga,*len_s;
4504   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4505   PetscInt             proc,m;
4506   PetscInt             **buf_ri,**buf_rj;
4507   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4508   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4509   MPI_Request          *s_waits,*r_waits;
4510   MPI_Status           *status;
4511   MatScalar            *aa=a->a;
4512   MatScalar            **abuf_r,*ba_i;
4513   Mat_Merge_SeqsToMPI  *merge;
4514   PetscContainer       container;
4515 
4516   PetscFunctionBegin;
4517   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4518 
4519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4520   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4521 
4522   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4523   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4524 
4525   bi     = merge->bi;
4526   bj     = merge->bj;
4527   buf_ri = merge->buf_ri;
4528   buf_rj = merge->buf_rj;
4529 
4530   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4531   owners = merge->rowmap->range;
4532   len_s  = merge->len_s;
4533 
4534   /* send and recv matrix values */
4535   /*-----------------------------*/
4536   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4537   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4538 
4539   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4540   for (proc=0,k=0; proc<size; proc++){
4541     if (!len_s[proc]) continue;
4542     i = owners[proc];
4543     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4544     k++;
4545   }
4546 
4547   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4548   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4549   ierr = PetscFree(status);CHKERRQ(ierr);
4550 
4551   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4552   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4553 
4554   /* insert mat values of mpimat */
4555   /*----------------------------*/
4556   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4557   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4558 
4559   for (k=0; k<merge->nrecv; k++){
4560     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4561     nrows = *(buf_ri_k[k]);
4562     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4563     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4564   }
4565 
4566   /* set values of ba */
4567   m = merge->rowmap->n;
4568   for (i=0; i<m; i++) {
4569     arow = owners[rank] + i;
4570     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4571     bnzi = bi[i+1] - bi[i];
4572     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4573 
4574     /* add local non-zero vals of this proc's seqmat into ba */
4575     anzi = ai[arow+1] - ai[arow];
4576     aj   = a->j + ai[arow];
4577     aa   = a->a + ai[arow];
4578     nextaj = 0;
4579     for (j=0; nextaj<anzi; j++){
4580       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4581         ba_i[j] += aa[nextaj++];
4582       }
4583     }
4584 
4585     /* add received vals into ba */
4586     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4587       /* i-th row */
4588       if (i == *nextrow[k]) {
4589         anzi = *(nextai[k]+1) - *nextai[k];
4590         aj   = buf_rj[k] + *(nextai[k]);
4591         aa   = abuf_r[k] + *(nextai[k]);
4592         nextaj = 0;
4593         for (j=0; nextaj<anzi; j++){
4594           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4595             ba_i[j] += aa[nextaj++];
4596           }
4597         }
4598         nextrow[k]++; nextai[k]++;
4599       }
4600     }
4601     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4602   }
4603   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4604   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4605 
4606   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4607   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4608   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4609   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4610   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4611   PetscFunctionReturn(0);
4612 }
4613 
4614 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4615 
4616 #undef __FUNCT__
4617 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4618 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4619 {
4620   PetscErrorCode       ierr;
4621   Mat                  B_mpi;
4622   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4623   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4624   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4625   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4626   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4627   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4628   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4629   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4630   MPI_Status           *status;
4631   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4632   PetscBT              lnkbt;
4633   Mat_Merge_SeqsToMPI  *merge;
4634   PetscContainer       container;
4635 
4636   PetscFunctionBegin;
4637   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4638 
4639   /* make sure it is a PETSc comm */
4640   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4641   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4642   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4643 
4644   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4645   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4646 
4647   /* determine row ownership */
4648   /*---------------------------------------------------------*/
4649   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4650   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4651   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4652   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4653   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4654   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4655   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4656 
4657   m      = merge->rowmap->n;
4658   M      = merge->rowmap->N;
4659   owners = merge->rowmap->range;
4660 
4661   /* determine the number of messages to send, their lengths */
4662   /*---------------------------------------------------------*/
4663   len_s  = merge->len_s;
4664 
4665   len = 0;  /* length of buf_si[] */
4666   merge->nsend = 0;
4667   for (proc=0; proc<size; proc++){
4668     len_si[proc] = 0;
4669     if (proc == rank){
4670       len_s[proc] = 0;
4671     } else {
4672       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4673       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4674     }
4675     if (len_s[proc]) {
4676       merge->nsend++;
4677       nrows = 0;
4678       for (i=owners[proc]; i<owners[proc+1]; i++){
4679         if (ai[i+1] > ai[i]) nrows++;
4680       }
4681       len_si[proc] = 2*(nrows+1);
4682       len += len_si[proc];
4683     }
4684   }
4685 
4686   /* determine the number and length of messages to receive for ij-structure */
4687   /*-------------------------------------------------------------------------*/
4688   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4689   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4690 
4691   /* post the Irecv of j-structure */
4692   /*-------------------------------*/
4693   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4694   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4695 
4696   /* post the Isend of j-structure */
4697   /*--------------------------------*/
4698   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4699 
4700   for (proc=0, k=0; proc<size; proc++){
4701     if (!len_s[proc]) continue;
4702     i = owners[proc];
4703     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4704     k++;
4705   }
4706 
4707   /* receives and sends of j-structure are complete */
4708   /*------------------------------------------------*/
4709   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4710   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4711 
4712   /* send and recv i-structure */
4713   /*---------------------------*/
4714   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4715   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4716 
4717   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4718   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4719   for (proc=0,k=0; proc<size; proc++){
4720     if (!len_s[proc]) continue;
4721     /* form outgoing message for i-structure:
4722          buf_si[0]:                 nrows to be sent
4723                [1:nrows]:           row index (global)
4724                [nrows+1:2*nrows+1]: i-structure index
4725     */
4726     /*-------------------------------------------*/
4727     nrows = len_si[proc]/2 - 1;
4728     buf_si_i    = buf_si + nrows+1;
4729     buf_si[0]   = nrows;
4730     buf_si_i[0] = 0;
4731     nrows = 0;
4732     for (i=owners[proc]; i<owners[proc+1]; i++){
4733       anzi = ai[i+1] - ai[i];
4734       if (anzi) {
4735         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4736         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4737         nrows++;
4738       }
4739     }
4740     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4741     k++;
4742     buf_si += len_si[proc];
4743   }
4744 
4745   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4746   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4747 
4748   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4749   for (i=0; i<merge->nrecv; i++){
4750     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4751   }
4752 
4753   ierr = PetscFree(len_si);CHKERRQ(ierr);
4754   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4755   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4756   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4757   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4758   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4759   ierr = PetscFree(status);CHKERRQ(ierr);
4760 
4761   /* compute a local seq matrix in each processor */
4762   /*----------------------------------------------*/
4763   /* allocate bi array and free space for accumulating nonzero column info */
4764   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4765   bi[0] = 0;
4766 
4767   /* create and initialize a linked list */
4768   nlnk = N+1;
4769   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4770 
4771   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4772   len = 0;
4773   len  = ai[owners[rank+1]] - ai[owners[rank]];
4774   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4775   current_space = free_space;
4776 
4777   /* determine symbolic info for each local row */
4778   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4779 
4780   for (k=0; k<merge->nrecv; k++){
4781     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4782     nrows = *buf_ri_k[k];
4783     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4784     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4785   }
4786 
4787   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4788   len = 0;
4789   for (i=0;i<m;i++) {
4790     bnzi   = 0;
4791     /* add local non-zero cols of this proc's seqmat into lnk */
4792     arow   = owners[rank] + i;
4793     anzi   = ai[arow+1] - ai[arow];
4794     aj     = a->j + ai[arow];
4795     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4796     bnzi += nlnk;
4797     /* add received col data into lnk */
4798     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4799       if (i == *nextrow[k]) { /* i-th row */
4800         anzi = *(nextai[k]+1) - *nextai[k];
4801         aj   = buf_rj[k] + *nextai[k];
4802         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4803         bnzi += nlnk;
4804         nextrow[k]++; nextai[k]++;
4805       }
4806     }
4807     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4808 
4809     /* if free space is not available, make more free space */
4810     if (current_space->local_remaining<bnzi) {
4811       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4812       nspacedouble++;
4813     }
4814     /* copy data into free space, then initialize lnk */
4815     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4816     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4817 
4818     current_space->array           += bnzi;
4819     current_space->local_used      += bnzi;
4820     current_space->local_remaining -= bnzi;
4821 
4822     bi[i+1] = bi[i] + bnzi;
4823   }
4824 
4825   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4826 
4827   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4828   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4829   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4830 
4831   /* create symbolic parallel matrix B_mpi */
4832   /*---------------------------------------*/
4833     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4834   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4835   if (n==PETSC_DECIDE) {
4836     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4837   } else {
4838     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4839   }
4840   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4841   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4842   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4843   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4844   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4845 
4846   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4847   B_mpi->assembled     = PETSC_FALSE;
4848   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4849   merge->bi            = bi;
4850   merge->bj            = bj;
4851   merge->buf_ri        = buf_ri;
4852   merge->buf_rj        = buf_rj;
4853   merge->coi           = PETSC_NULL;
4854   merge->coj           = PETSC_NULL;
4855   merge->owners_co     = PETSC_NULL;
4856 
4857   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4858 
4859   /* attach the supporting struct to B_mpi for reuse */
4860   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4861   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4862   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4863   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4864   *mpimat = B_mpi;
4865 
4866   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4867   PetscFunctionReturn(0);
4868 }
4869 
4870 #undef __FUNCT__
4871 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4872 /*@C
4873       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4874                  matrices from each processor
4875 
4876     Collective on MPI_Comm
4877 
4878    Input Parameters:
4879 +    comm - the communicators the parallel matrix will live on
4880 .    seqmat - the input sequential matrices
4881 .    m - number of local rows (or PETSC_DECIDE)
4882 .    n - number of local columns (or PETSC_DECIDE)
4883 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4884 
4885    Output Parameter:
4886 .    mpimat - the parallel matrix generated
4887 
4888     Level: advanced
4889 
4890    Notes:
4891      The dimensions of the sequential matrix in each processor MUST be the same.
4892      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4893      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4894 @*/
4895 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4896 {
4897   PetscErrorCode   ierr;
4898   PetscMPIInt     size;
4899 
4900   PetscFunctionBegin;
4901   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4902   if (size == 1){
4903      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4904      if (scall == MAT_INITIAL_MATRIX){
4905        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4906      } else {
4907        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4908      }
4909      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4910      PetscFunctionReturn(0);
4911   }
4912   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4913   if (scall == MAT_INITIAL_MATRIX){
4914     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4915   }
4916   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4917   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4918   PetscFunctionReturn(0);
4919 }
4920 
4921 #undef __FUNCT__
4922 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4923 /*@
4924      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4925           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4926           with MatGetSize()
4927 
4928     Not Collective
4929 
4930    Input Parameters:
4931 +    A - the matrix
4932 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4933 
4934    Output Parameter:
4935 .    A_loc - the local sequential matrix generated
4936 
4937     Level: developer
4938 
4939 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4940 
4941 @*/
4942 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4943 {
4944   PetscErrorCode  ierr;
4945   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4946   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4947   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4948   MatScalar       *aa=a->a,*ba=b->a,*cam;
4949   PetscScalar     *ca;
4950   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4951   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4952   PetscBool       match;
4953 
4954   PetscFunctionBegin;
4955   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4956   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4957   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4958   if (scall == MAT_INITIAL_MATRIX){
4959     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4960     ci[0] = 0;
4961     for (i=0; i<am; i++){
4962       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4963     }
4964     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4965     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4966     k = 0;
4967     for (i=0; i<am; i++) {
4968       ncols_o = bi[i+1] - bi[i];
4969       ncols_d = ai[i+1] - ai[i];
4970       /* off-diagonal portion of A */
4971       for (jo=0; jo<ncols_o; jo++) {
4972         col = cmap[*bj];
4973         if (col >= cstart) break;
4974         cj[k]   = col; bj++;
4975         ca[k++] = *ba++;
4976       }
4977       /* diagonal portion of A */
4978       for (j=0; j<ncols_d; j++) {
4979         cj[k]   = cstart + *aj++;
4980         ca[k++] = *aa++;
4981       }
4982       /* off-diagonal portion of A */
4983       for (j=jo; j<ncols_o; j++) {
4984         cj[k]   = cmap[*bj++];
4985         ca[k++] = *ba++;
4986       }
4987     }
4988     /* put together the new matrix */
4989     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4990     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4991     /* Since these are PETSc arrays, change flags to free them as necessary. */
4992     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4993     mat->free_a  = PETSC_TRUE;
4994     mat->free_ij = PETSC_TRUE;
4995     mat->nonew   = 0;
4996   } else if (scall == MAT_REUSE_MATRIX){
4997     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4998     ci = mat->i; cj = mat->j; cam = mat->a;
4999     for (i=0; i<am; i++) {
5000       /* off-diagonal portion of A */
5001       ncols_o = bi[i+1] - bi[i];
5002       for (jo=0; jo<ncols_o; jo++) {
5003         col = cmap[*bj];
5004         if (col >= cstart) break;
5005         *cam++ = *ba++; bj++;
5006       }
5007       /* diagonal portion of A */
5008       ncols_d = ai[i+1] - ai[i];
5009       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5010       /* off-diagonal portion of A */
5011       for (j=jo; j<ncols_o; j++) {
5012         *cam++ = *ba++; bj++;
5013       }
5014     }
5015   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5016   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5017   PetscFunctionReturn(0);
5018 }
5019 
5020 #undef __FUNCT__
5021 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5022 /*@C
5023      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5024 
5025     Not Collective
5026 
5027    Input Parameters:
5028 +    A - the matrix
5029 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5030 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5031 
5032    Output Parameter:
5033 .    A_loc - the local sequential matrix generated
5034 
5035     Level: developer
5036 
5037 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5038 
5039 @*/
5040 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5041 {
5042   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5043   PetscErrorCode    ierr;
5044   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5045   IS                isrowa,iscola;
5046   Mat               *aloc;
5047   PetscBool       match;
5048 
5049   PetscFunctionBegin;
5050   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5051   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5052   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5053   if (!row){
5054     start = A->rmap->rstart; end = A->rmap->rend;
5055     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5056   } else {
5057     isrowa = *row;
5058   }
5059   if (!col){
5060     start = A->cmap->rstart;
5061     cmap  = a->garray;
5062     nzA   = a->A->cmap->n;
5063     nzB   = a->B->cmap->n;
5064     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5065     ncols = 0;
5066     for (i=0; i<nzB; i++) {
5067       if (cmap[i] < start) idx[ncols++] = cmap[i];
5068       else break;
5069     }
5070     imark = i;
5071     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5072     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5073     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5074   } else {
5075     iscola = *col;
5076   }
5077   if (scall != MAT_INITIAL_MATRIX){
5078     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5079     aloc[0] = *A_loc;
5080   }
5081   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5082   *A_loc = aloc[0];
5083   ierr = PetscFree(aloc);CHKERRQ(ierr);
5084   if (!row){
5085     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5086   }
5087   if (!col){
5088     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5089   }
5090   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 #undef __FUNCT__
5095 #define __FUNCT__ "MatGetBrowsOfAcols"
5096 /*@C
5097     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5098 
5099     Collective on Mat
5100 
5101    Input Parameters:
5102 +    A,B - the matrices in mpiaij format
5103 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5105 
5106    Output Parameter:
5107 +    rowb, colb - index sets of rows and columns of B to extract
5108 -    B_seq - the sequential matrix generated
5109 
5110     Level: developer
5111 
5112 @*/
5113 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5114 {
5115   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5116   PetscErrorCode    ierr;
5117   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5118   IS                isrowb,iscolb;
5119   Mat               *bseq=PETSC_NULL;
5120 
5121   PetscFunctionBegin;
5122   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5123     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5124   }
5125   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5126 
5127   if (scall == MAT_INITIAL_MATRIX){
5128     start = A->cmap->rstart;
5129     cmap  = a->garray;
5130     nzA   = a->A->cmap->n;
5131     nzB   = a->B->cmap->n;
5132     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5133     ncols = 0;
5134     for (i=0; i<nzB; i++) {  /* row < local row index */
5135       if (cmap[i] < start) idx[ncols++] = cmap[i];
5136       else break;
5137     }
5138     imark = i;
5139     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5140     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5141     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5142     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5143   } else {
5144     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5145     isrowb = *rowb; iscolb = *colb;
5146     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5147     bseq[0] = *B_seq;
5148   }
5149   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5150   *B_seq = bseq[0];
5151   ierr = PetscFree(bseq);CHKERRQ(ierr);
5152   if (!rowb){
5153     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5154   } else {
5155     *rowb = isrowb;
5156   }
5157   if (!colb){
5158     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5159   } else {
5160     *colb = iscolb;
5161   }
5162   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5163   PetscFunctionReturn(0);
5164 }
5165 
5166 #undef __FUNCT__
5167 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5168 /*
5169     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5170     of the OFF-DIAGONAL portion of local A
5171 
5172     Collective on Mat
5173 
5174    Input Parameters:
5175 +    A,B - the matrices in mpiaij format
5176 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5177 
5178    Output Parameter:
5179 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5180 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5181 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5182 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5183 
5184     Level: developer
5185 
5186 */
5187 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5188 {
5189   VecScatter_MPI_General *gen_to,*gen_from;
5190   PetscErrorCode         ierr;
5191   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5192   Mat_SeqAIJ             *b_oth;
5193   VecScatter             ctx=a->Mvctx;
5194   MPI_Comm               comm=((PetscObject)ctx)->comm;
5195   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5196   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5197   PetscScalar            *rvalues,*svalues;
5198   MatScalar              *b_otha,*bufa,*bufA;
5199   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5200   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5201   MPI_Status             *sstatus,rstatus;
5202   PetscMPIInt            jj;
5203   PetscInt               *cols,sbs,rbs;
5204   PetscScalar            *vals;
5205 
5206   PetscFunctionBegin;
5207   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5208     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5209   }
5210   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5211   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5212 
5213   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5214   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5215   rvalues  = gen_from->values; /* holds the length of receiving row */
5216   svalues  = gen_to->values;   /* holds the length of sending row */
5217   nrecvs   = gen_from->n;
5218   nsends   = gen_to->n;
5219 
5220   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5221   srow     = gen_to->indices;   /* local row index to be sent */
5222   sstarts  = gen_to->starts;
5223   sprocs   = gen_to->procs;
5224   sstatus  = gen_to->sstatus;
5225   sbs      = gen_to->bs;
5226   rstarts  = gen_from->starts;
5227   rprocs   = gen_from->procs;
5228   rbs      = gen_from->bs;
5229 
5230   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5231   if (scall == MAT_INITIAL_MATRIX){
5232     /* i-array */
5233     /*---------*/
5234     /*  post receives */
5235     for (i=0; i<nrecvs; i++){
5236       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5237       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5238       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5239     }
5240 
5241     /* pack the outgoing message */
5242     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5243     sstartsj[0] = 0;  rstartsj[0] = 0;
5244     len = 0; /* total length of j or a array to be sent */
5245     k = 0;
5246     for (i=0; i<nsends; i++){
5247       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5248       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5249       for (j=0; j<nrows; j++) {
5250         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5251         for (l=0; l<sbs; l++){
5252           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5253           rowlen[j*sbs+l] = ncols;
5254           len += ncols;
5255           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5256         }
5257         k++;
5258       }
5259       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5260       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5261     }
5262     /* recvs and sends of i-array are completed */
5263     i = nrecvs;
5264     while (i--) {
5265       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5266     }
5267     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5268 
5269     /* allocate buffers for sending j and a arrays */
5270     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5271     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5272 
5273     /* create i-array of B_oth */
5274     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5275     b_othi[0] = 0;
5276     len = 0; /* total length of j or a array to be received */
5277     k = 0;
5278     for (i=0; i<nrecvs; i++){
5279       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5280       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5281       for (j=0; j<nrows; j++) {
5282         b_othi[k+1] = b_othi[k] + rowlen[j];
5283         len += rowlen[j]; k++;
5284       }
5285       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5286     }
5287 
5288     /* allocate space for j and a arrrays of B_oth */
5289     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5290     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5291 
5292     /* j-array */
5293     /*---------*/
5294     /*  post receives of j-array */
5295     for (i=0; i<nrecvs; i++){
5296       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5297       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5298     }
5299 
5300     /* pack the outgoing message j-array */
5301     k = 0;
5302     for (i=0; i<nsends; i++){
5303       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5304       bufJ = bufj+sstartsj[i];
5305       for (j=0; j<nrows; j++) {
5306         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5307         for (ll=0; ll<sbs; ll++){
5308           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5309           for (l=0; l<ncols; l++){
5310             *bufJ++ = cols[l];
5311           }
5312           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5313         }
5314       }
5315       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5316     }
5317 
5318     /* recvs and sends of j-array are completed */
5319     i = nrecvs;
5320     while (i--) {
5321       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5322     }
5323     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5324   } else if (scall == MAT_REUSE_MATRIX){
5325     sstartsj = *startsj_s;
5326     rstartsj = *startsj_r;
5327     bufa     = *bufa_ptr;
5328     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5329     b_otha   = b_oth->a;
5330   } else {
5331     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5332   }
5333 
5334   /* a-array */
5335   /*---------*/
5336   /*  post receives of a-array */
5337   for (i=0; i<nrecvs; i++){
5338     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5339     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5340   }
5341 
5342   /* pack the outgoing message a-array */
5343   k = 0;
5344   for (i=0; i<nsends; i++){
5345     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5346     bufA = bufa+sstartsj[i];
5347     for (j=0; j<nrows; j++) {
5348       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5349       for (ll=0; ll<sbs; ll++){
5350         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5351         for (l=0; l<ncols; l++){
5352           *bufA++ = vals[l];
5353         }
5354         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5355       }
5356     }
5357     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5358   }
5359   /* recvs and sends of a-array are completed */
5360   i = nrecvs;
5361   while (i--) {
5362     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5363   }
5364   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5365   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5366 
5367   if (scall == MAT_INITIAL_MATRIX){
5368     /* put together the new matrix */
5369     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5370 
5371     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5372     /* Since these are PETSc arrays, change flags to free them as necessary. */
5373     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5374     b_oth->free_a  = PETSC_TRUE;
5375     b_oth->free_ij = PETSC_TRUE;
5376     b_oth->nonew   = 0;
5377 
5378     ierr = PetscFree(bufj);CHKERRQ(ierr);
5379     if (!startsj_s || !bufa_ptr){
5380       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5381       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5382     } else {
5383       *startsj_s = sstartsj;
5384       *startsj_r = rstartsj;
5385       *bufa_ptr  = bufa;
5386     }
5387   }
5388   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5389   PetscFunctionReturn(0);
5390 }
5391 
5392 #undef __FUNCT__
5393 #define __FUNCT__ "MatGetCommunicationStructs"
5394 /*@C
5395   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5396 
5397   Not Collective
5398 
5399   Input Parameters:
5400 . A - The matrix in mpiaij format
5401 
5402   Output Parameter:
5403 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5404 . colmap - A map from global column index to local index into lvec
5405 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5406 
5407   Level: developer
5408 
5409 @*/
5410 #if defined (PETSC_USE_CTABLE)
5411 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5412 #else
5413 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5414 #endif
5415 {
5416   Mat_MPIAIJ *a;
5417 
5418   PetscFunctionBegin;
5419   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5420   PetscValidPointer(lvec, 2);
5421   PetscValidPointer(colmap, 3);
5422   PetscValidPointer(multScatter, 4);
5423   a = (Mat_MPIAIJ *) A->data;
5424   if (lvec) *lvec = a->lvec;
5425   if (colmap) *colmap = a->colmap;
5426   if (multScatter) *multScatter = a->Mvctx;
5427   PetscFunctionReturn(0);
5428 }
5429 
5430 EXTERN_C_BEGIN
5431 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5432 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5433 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5434 EXTERN_C_END
5435 
5436 #undef __FUNCT__
5437 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5438 /*
5439     Computes (B'*A')' since computing B*A directly is untenable
5440 
5441                n                       p                          p
5442         (              )       (              )         (                  )
5443       m (      A       )  *  n (       B      )   =   m (         C        )
5444         (              )       (              )         (                  )
5445 
5446 */
5447 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5448 {
5449   PetscErrorCode     ierr;
5450   Mat                At,Bt,Ct;
5451 
5452   PetscFunctionBegin;
5453   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5454   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5455   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5456   ierr = MatDestroy(&At);CHKERRQ(ierr);
5457   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5458   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5459   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5460   PetscFunctionReturn(0);
5461 }
5462 
5463 #undef __FUNCT__
5464 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5465 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5466 {
5467   PetscErrorCode ierr;
5468   PetscInt       m=A->rmap->n,n=B->cmap->n;
5469   Mat            Cmat;
5470 
5471   PetscFunctionBegin;
5472   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5473   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5474   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5475   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5476   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5477   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5478   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5479   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5480   *C   = Cmat;
5481   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5482   PetscFunctionReturn(0);
5483 }
5484 
5485 /* ----------------------------------------------------------------*/
5486 #undef __FUNCT__
5487 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5488 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5489 {
5490   PetscErrorCode ierr;
5491 
5492   PetscFunctionBegin;
5493   if (scall == MAT_INITIAL_MATRIX){
5494     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5495   }
5496   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5497   PetscFunctionReturn(0);
5498 }
5499 
5500 EXTERN_C_BEGIN
5501 #if defined(PETSC_HAVE_MUMPS)
5502 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5503 #endif
5504 #if defined(PETSC_HAVE_PASTIX)
5505 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5506 #endif
5507 #if defined(PETSC_HAVE_SUPERLU_DIST)
5508 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5509 #endif
5510 #if defined(PETSC_HAVE_SPOOLES)
5511 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5512 #endif
5513 #if defined(PETSC_HAVE_CLIQUE)
5514 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5515 #endif
5516 EXTERN_C_END
5517 
5518 /*MC
5519    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5520 
5521    Options Database Keys:
5522 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5523 
5524   Level: beginner
5525 
5526 .seealso: MatCreateAIJ()
5527 M*/
5528 
5529 EXTERN_C_BEGIN
5530 #undef __FUNCT__
5531 #define __FUNCT__ "MatCreate_MPIAIJ"
5532 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5533 {
5534   Mat_MPIAIJ     *b;
5535   PetscErrorCode ierr;
5536   PetscMPIInt    size;
5537 
5538   PetscFunctionBegin;
5539   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5540   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5541   B->data         = (void*)b;
5542   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5543   B->assembled    = PETSC_FALSE;
5544   B->insertmode   = NOT_SET_VALUES;
5545   b->size         = size;
5546   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5547 
5548   /* build cache for off array entries formed */
5549   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5550   b->donotstash  = PETSC_FALSE;
5551   b->colmap      = 0;
5552   b->garray      = 0;
5553   b->roworiented = PETSC_TRUE;
5554 
5555   /* stuff used for matrix vector multiply */
5556   b->lvec      = PETSC_NULL;
5557   b->Mvctx     = PETSC_NULL;
5558 
5559   /* stuff for MatGetRow() */
5560   b->rowindices   = 0;
5561   b->rowvalues    = 0;
5562   b->getrowactive = PETSC_FALSE;
5563 
5564   /* flexible pointer used in CUSP/CUSPARSE classes */
5565   b->spptr        = PETSC_NULL;
5566 #if defined(PETSC_HAVE_SPOOLES)
5567   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5568                                      "MatGetFactor_mpiaij_spooles",
5569                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5570 #endif
5571 #if defined(PETSC_HAVE_MUMPS)
5572   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5573                                      "MatGetFactor_aij_mumps",
5574                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5575 #endif
5576 #if defined(PETSC_HAVE_PASTIX)
5577   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5578 					   "MatGetFactor_mpiaij_pastix",
5579 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5580 #endif
5581 #if defined(PETSC_HAVE_SUPERLU_DIST)
5582   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5583                                      "MatGetFactor_mpiaij_superlu_dist",
5584                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5585 #endif
5586 #if defined(PETSC_HAVE_CLIQUE)
5587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5588                                      "MatGetFactor_aij_clique",
5589                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5590 #endif
5591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5592                                      "MatStoreValues_MPIAIJ",
5593                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5595                                      "MatRetrieveValues_MPIAIJ",
5596                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5598 				     "MatGetDiagonalBlock_MPIAIJ",
5599                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5601 				     "MatIsTranspose_MPIAIJ",
5602 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5604 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5605 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5607 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5608 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5610 				     "MatDiagonalScaleLocal_MPIAIJ",
5611 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5613                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5614                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5616                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5617                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5619                                      "MatConvert_MPIAIJ_MPISBAIJ",
5620                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5622                                      "MatMatMult_MPIDense_MPIAIJ",
5623                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5624   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5625                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5626                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5627   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5628                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5629                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5630   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5631   PetscFunctionReturn(0);
5632 }
5633 EXTERN_C_END
5634 
5635 #undef __FUNCT__
5636 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5637 /*@
5638      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5639          and "off-diagonal" part of the matrix in CSR format.
5640 
5641    Collective on MPI_Comm
5642 
5643    Input Parameters:
5644 +  comm - MPI communicator
5645 .  m - number of local rows (Cannot be PETSC_DECIDE)
5646 .  n - This value should be the same as the local size used in creating the
5647        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5648        calculated if N is given) For square matrices n is almost always m.
5649 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5650 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5651 .   i - row indices for "diagonal" portion of matrix
5652 .   j - column indices
5653 .   a - matrix values
5654 .   oi - row indices for "off-diagonal" portion of matrix
5655 .   oj - column indices
5656 -   oa - matrix values
5657 
5658    Output Parameter:
5659 .   mat - the matrix
5660 
5661    Level: advanced
5662 
5663    Notes:
5664        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5665        must free the arrays once the matrix has been destroyed and not before.
5666 
5667        The i and j indices are 0 based
5668 
5669        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5670 
5671        This sets local rows and cannot be used to set off-processor values.
5672 
5673        You cannot later use MatSetValues() to change values in this matrix.
5674 
5675 .keywords: matrix, aij, compressed row, sparse, parallel
5676 
5677 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5678           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5679 @*/
5680 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5681 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5682 {
5683   PetscErrorCode ierr;
5684   Mat_MPIAIJ     *maij;
5685 
5686  PetscFunctionBegin;
5687   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5688   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5689   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5690   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5691   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5692   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5693   maij = (Mat_MPIAIJ*) (*mat)->data;
5694   maij->donotstash     = PETSC_TRUE;
5695   (*mat)->preallocated = PETSC_TRUE;
5696 
5697   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5698   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5699 
5700   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5701   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5702 
5703   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5705   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5706   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5707 
5708   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5709   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5710   PetscFunctionReturn(0);
5711 }
5712 
5713 /*
5714     Special version for direct calls from Fortran
5715 */
5716 #include <petsc-private/fortranimpl.h>
5717 
5718 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5719 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5720 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5721 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5722 #endif
5723 
5724 /* Change these macros so can be used in void function */
5725 #undef CHKERRQ
5726 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5727 #undef SETERRQ2
5728 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5729 #undef SETERRQ3
5730 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5731 #undef SETERRQ
5732 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5733 
5734 EXTERN_C_BEGIN
5735 #undef __FUNCT__
5736 #define __FUNCT__ "matsetvaluesmpiaij_"
5737 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5738 {
5739   Mat             mat = *mmat;
5740   PetscInt        m = *mm, n = *mn;
5741   InsertMode      addv = *maddv;
5742   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5743   PetscScalar     value;
5744   PetscErrorCode  ierr;
5745 
5746   MatCheckPreallocated(mat,1);
5747   if (mat->insertmode == NOT_SET_VALUES) {
5748     mat->insertmode = addv;
5749   }
5750 #if defined(PETSC_USE_DEBUG)
5751   else if (mat->insertmode != addv) {
5752     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5753   }
5754 #endif
5755   {
5756   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5757   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5758   PetscBool       roworiented = aij->roworiented;
5759 
5760   /* Some Variables required in the macro */
5761   Mat             A = aij->A;
5762   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5763   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5764   MatScalar       *aa = a->a;
5765   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5766   Mat             B = aij->B;
5767   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5768   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5769   MatScalar       *ba = b->a;
5770 
5771   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5772   PetscInt        nonew = a->nonew;
5773   MatScalar       *ap1,*ap2;
5774 
5775   PetscFunctionBegin;
5776   for (i=0; i<m; i++) {
5777     if (im[i] < 0) continue;
5778 #if defined(PETSC_USE_DEBUG)
5779     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5780 #endif
5781     if (im[i] >= rstart && im[i] < rend) {
5782       row      = im[i] - rstart;
5783       lastcol1 = -1;
5784       rp1      = aj + ai[row];
5785       ap1      = aa + ai[row];
5786       rmax1    = aimax[row];
5787       nrow1    = ailen[row];
5788       low1     = 0;
5789       high1    = nrow1;
5790       lastcol2 = -1;
5791       rp2      = bj + bi[row];
5792       ap2      = ba + bi[row];
5793       rmax2    = bimax[row];
5794       nrow2    = bilen[row];
5795       low2     = 0;
5796       high2    = nrow2;
5797 
5798       for (j=0; j<n; j++) {
5799         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5800         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5801         if (in[j] >= cstart && in[j] < cend){
5802           col = in[j] - cstart;
5803           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5804         } else if (in[j] < 0) continue;
5805 #if defined(PETSC_USE_DEBUG)
5806         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5807 #endif
5808         else {
5809           if (mat->was_assembled) {
5810             if (!aij->colmap) {
5811               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5812             }
5813 #if defined (PETSC_USE_CTABLE)
5814             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5815 	    col--;
5816 #else
5817             col = aij->colmap[in[j]] - 1;
5818 #endif
5819             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5820               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5821               col =  in[j];
5822               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5823               B = aij->B;
5824               b = (Mat_SeqAIJ*)B->data;
5825               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5826               rp2      = bj + bi[row];
5827               ap2      = ba + bi[row];
5828               rmax2    = bimax[row];
5829               nrow2    = bilen[row];
5830               low2     = 0;
5831               high2    = nrow2;
5832               bm       = aij->B->rmap->n;
5833               ba = b->a;
5834             }
5835           } else col = in[j];
5836           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5837         }
5838       }
5839     } else {
5840       if (!aij->donotstash) {
5841         if (roworiented) {
5842           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5843         } else {
5844           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5845         }
5846       }
5847     }
5848   }}
5849   PetscFunctionReturnVoid();
5850 }
5851 EXTERN_C_END
5852 
5853