xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 596f5af7c3d0c0d6c3f65e9efcda2bd4733ba063)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302   FILE              *file;
1303 
1304   PetscFunctionBegin;
1305   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1306   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1307   nz   = A->nz + B->nz;
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) {
1319       rlen = PetscMax(rlen,range[i+1] - range[i]);
1320     }
1321   } else {
1322     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1323     rlen = mat->rmap->n;
1324   }
1325 
1326   /* load up the local row counts */
1327   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1328   for (i=0; i<mat->rmap->n; i++) {
1329     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1330   }
1331 
1332   /* store the row lengths to the file */
1333   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1334   if (!rank) {
1335     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     for (i=1; i<size; i++) {
1337       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1338       rlen = range[i+1] - range[i];
1339       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1345     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1346     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1349 
1350   /* load up the local column indices */
1351   nzmax = nz; /* )th processor needs space a largest processor needs */
1352   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1353   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1354   cnt  = 0;
1355   for (i=0; i<mat->rmap->n; i++) {
1356     for (j=B->i[i]; j<B->i[i+1]; j++) {
1357       if ( (col = garray[B->j[j]]) > cstart) break;
1358       column_indices[cnt++] = col;
1359     }
1360     for (k=A->i[i]; k<A->i[i+1]; k++) {
1361       column_indices[cnt++] = A->j[k] + cstart;
1362     }
1363     for (; j<B->i[i+1]; j++) {
1364       column_indices[cnt++] = garray[B->j[j]];
1365     }
1366   }
1367   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1368 
1369   /* store the column indices to the file */
1370    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1371   if (!rank) {
1372     MPI_Status status;
1373     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     for (i=1; i<size; i++) {
1375       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1376       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1377       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1378       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1380     }
1381      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1382   } else {
1383     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1384     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1386     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1387   }
1388   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1389 
1390   /* load up the local column values */
1391   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1392   cnt  = 0;
1393   for (i=0; i<mat->rmap->n; i++) {
1394     for (j=B->i[i]; j<B->i[i+1]; j++) {
1395       if ( garray[B->j[j]] > cstart) break;
1396       column_values[cnt++] = B->a[j];
1397     }
1398     for (k=A->i[i]; k<A->i[i+1]; k++) {
1399       column_values[cnt++] = A->a[k];
1400     }
1401     for (; j<B->i[i+1]; j++) {
1402       column_values[cnt++] = B->a[j];
1403     }
1404   }
1405   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1406 
1407   /* store the column values to the file */
1408    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1409   if (!rank) {
1410     MPI_Status status;
1411     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     for (i=1; i<size; i++) {
1413        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1414       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1415       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1416       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1420   } else {
1421     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1422     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1424     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1425   }
1426   ierr = PetscFree(column_values);CHKERRQ(ierr);
1427 
1428   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1429   if (file) {
1430     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1431   }
1432 
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1439 {
1440   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1441   PetscErrorCode    ierr;
1442   PetscMPIInt       rank = aij->rank,size = aij->size;
1443   PetscBool         isdraw,iascii,isbinary;
1444   PetscViewer       sviewer;
1445   PetscViewerFormat format;
1446 
1447   PetscFunctionBegin;
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1451   if (iascii) {
1452     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1453     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1454       MatInfo    info;
1455       PetscBool  inodes;
1456 
1457       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1458       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1461       if (!inodes) {
1462         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1463 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1464       } else {
1465         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1466 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1467       }
1468       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1469       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1470       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1472       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1475       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1476       PetscFunctionReturn(0);
1477     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1478       PetscInt   inodecount,inodelimit,*inodes;
1479       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1480       if (inodes) {
1481         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1482       } else {
1483         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1484       }
1485       PetscFunctionReturn(0);
1486     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1487       PetscFunctionReturn(0);
1488     }
1489   } else if (isbinary) {
1490     if (size == 1) {
1491       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1492       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1493     } else {
1494       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1495     }
1496     PetscFunctionReturn(0);
1497   } else if (isdraw) {
1498     PetscDraw  draw;
1499     PetscBool  isnull;
1500     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1501     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1502   }
1503 
1504   if (size == 1) {
1505     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1506     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1507   } else {
1508     /* assemble the entire matrix onto first processor. */
1509     Mat         A;
1510     Mat_SeqAIJ  *Aloc;
1511     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1512     MatScalar   *a;
1513 
1514     if (mat->rmap->N > 1024) {
1515       PetscBool  flg = PETSC_FALSE;
1516 
1517       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1518       if (!flg) {
1519         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1520       }
1521     }
1522 
1523     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1524     if (!rank) {
1525       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1526     } else {
1527       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1528     }
1529     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1530     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1531     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1532     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1533     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1534 
1535     /* copy over the A part */
1536     Aloc = (Mat_SeqAIJ*)aij->A->data;
1537     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row = mat->rmap->rstart;
1539     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1542       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1543     }
1544     aj = Aloc->j;
1545     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1546 
1547     /* copy over the B part */
1548     Aloc = (Mat_SeqAIJ*)aij->B->data;
1549     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550     row  = mat->rmap->rstart;
1551     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1552     ct   = cols;
1553     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1554     for (i=0; i<m; i++) {
1555       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1556       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1557     }
1558     ierr = PetscFree(ct);CHKERRQ(ierr);
1559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1561     /*
1562        Everyone has to call to draw the matrix since the graphics waits are
1563        synchronized across all processors that share the PetscDraw object
1564     */
1565     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     if (!rank) {
1567       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1568       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1569       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1570       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1571     }
1572     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1573     ierr = MatDestroy(&A);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatView_MPIAIJ"
1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1581 {
1582   PetscErrorCode ierr;
1583   PetscBool      iascii,isdraw,issocket,isbinary;
1584 
1585   PetscFunctionBegin;
1586   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1587   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1588   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1590   if (iascii || isdraw || isbinary || issocket) {
1591     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1592   } else {
1593     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatSOR_MPIAIJ"
1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1601 {
1602   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1603   PetscErrorCode ierr;
1604   Vec            bb1 = 0;
1605   PetscBool      hasop;
1606 
1607   PetscFunctionBegin;
1608   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1609     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1610   }
1611 
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622 
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1651     if (flag & SOR_ZERO_INITIAL_GUESS) {
1652       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1653       its--;
1654     }
1655     while (its--) {
1656       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658 
1659       /* update rhs: bb1 = bb - B*x */
1660       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1661       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1662 
1663       /* local sweep */
1664       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1665     }
1666   }  else if (flag & SOR_EISENSTAT) {
1667     Vec         xx1;
1668 
1669     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1670     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1671 
1672     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674     if (!mat->diag) {
1675       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1676       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1677     }
1678     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1679     if (hasop) {
1680       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1681     } else {
1682       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1683     }
1684     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1685 
1686     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1687 
1688     /* local sweep */
1689     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1691     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1692   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1693 
1694   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatPermute_MPIAIJ"
1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1701 {
1702   MPI_Comm       comm;
1703   PetscInt       first,local_rowsize,local_colsize;
1704   const PetscInt *rows;
1705   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1710   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1711   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1712   /* collect the global row permutation and invert it */
1713   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1714   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1715   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1716   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1717   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1718   /* get the local target indices */
1719   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1720   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1721   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1722   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1723   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1724   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1725   /* the column permutation is so much easier;
1726      make a local version of 'colp' and invert it */
1727   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1728   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1729   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1730   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1731   /* now we just get the submatrix */
1732   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1733   /* clean up */
1734   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1735   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   Mat            A = mat->A,B = mat->B;
1745   PetscErrorCode ierr;
1746   PetscReal      isend[5],irecv[5];
1747 
1748   PetscFunctionBegin;
1749   info->block_size     = 1.0;
1750   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1752   isend[3] = info->memory;  isend[4] = info->mallocs;
1753   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1754   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1755   isend[3] += info->memory;  isend[4] += info->mallocs;
1756   if (flag == MAT_LOCAL) {
1757     info->nz_used      = isend[0];
1758     info->nz_allocated = isend[1];
1759     info->nz_unneeded  = isend[2];
1760     info->memory       = isend[3];
1761     info->mallocs      = isend[4];
1762   } else if (flag == MAT_GLOBAL_MAX) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   } else if (flag == MAT_GLOBAL_SUM) {
1770     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1771     info->nz_used      = irecv[0];
1772     info->nz_allocated = irecv[1];
1773     info->nz_unneeded  = irecv[2];
1774     info->memory       = irecv[3];
1775     info->mallocs      = irecv[4];
1776   }
1777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1778   info->fill_ratio_needed = 0;
1779   info->factor_mallocs    = 0;
1780 
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatSetOption_MPIAIJ"
1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1787 {
1788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1789   PetscErrorCode ierr;
1790 
1791   PetscFunctionBegin;
1792   switch (op) {
1793   case MAT_NEW_NONZERO_LOCATIONS:
1794   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1795   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1796   case MAT_KEEP_NONZERO_PATTERN:
1797   case MAT_NEW_NONZERO_LOCATION_ERR:
1798   case MAT_USE_INODES:
1799   case MAT_IGNORE_ZERO_ENTRIES:
1800     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1801     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1802     break;
1803   case MAT_ROW_ORIENTED:
1804     a->roworiented = flg;
1805     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1806     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_NEW_DIAGONALS:
1809     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1810     break;
1811   case MAT_IGNORE_OFF_PROC_ENTRIES:
1812     a->donotstash = flg;
1813     break;
1814   case MAT_SPD:
1815     A->spd_set                         = PETSC_TRUE;
1816     A->spd                             = flg;
1817     if (flg) {
1818       A->symmetric                     = PETSC_TRUE;
1819       A->structurally_symmetric        = PETSC_TRUE;
1820       A->symmetric_set                 = PETSC_TRUE;
1821       A->structurally_symmetric_set    = PETSC_TRUE;
1822     }
1823     break;
1824   case MAT_SYMMETRIC:
1825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826     break;
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1829     break;
1830   case MAT_HERMITIAN:
1831     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1832     break;
1833   case MAT_SYMMETRY_ETERNAL:
1834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1835     break;
1836   default:
1837     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1838   }
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatGetRow_MPIAIJ"
1844 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1845 {
1846   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1847   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1848   PetscErrorCode ierr;
1849   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1850   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1851   PetscInt       *cmap,*idx_p;
1852 
1853   PetscFunctionBegin;
1854   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1855   mat->getrowactive = PETSC_TRUE;
1856 
1857   if (!mat->rowvalues && (idx || v)) {
1858     /*
1859         allocate enough space to hold information from the longest row.
1860     */
1861     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1862     PetscInt   max = 1,tmp;
1863     for (i=0; i<matin->rmap->n; i++) {
1864       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1865       if (max < tmp) { max = tmp; }
1866     }
1867     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1868   }
1869 
1870   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1871   lrow = row - rstart;
1872 
1873   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1874   if (!v)   {pvA = 0; pvB = 0;}
1875   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1876   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1877   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1878   nztot = nzA + nzB;
1879 
1880   cmap  = mat->garray;
1881   if (v  || idx) {
1882     if (nztot) {
1883       /* Sort by increasing column numbers, assuming A and B already sorted */
1884       PetscInt imark = -1;
1885       if (v) {
1886         *v = v_p = mat->rowvalues;
1887         for (i=0; i<nzB; i++) {
1888           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1889           else break;
1890         }
1891         imark = i;
1892         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1893         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1894       }
1895       if (idx) {
1896         *idx = idx_p = mat->rowindices;
1897         if (imark > -1) {
1898           for (i=0; i<imark; i++) {
1899             idx_p[i] = cmap[cworkB[i]];
1900           }
1901         } else {
1902           for (i=0; i<nzB; i++) {
1903             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1904             else break;
1905           }
1906           imark = i;
1907         }
1908         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1909         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1910       }
1911     } else {
1912       if (idx) *idx = 0;
1913       if (v)   *v   = 0;
1914     }
1915   }
1916   *nz = nztot;
1917   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1918   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1924 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1925 {
1926   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1927 
1928   PetscFunctionBegin;
1929   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1930   aij->getrowactive = PETSC_FALSE;
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatNorm_MPIAIJ"
1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1937 {
1938   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1939   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1940   PetscErrorCode ierr;
1941   PetscInt       i,j,cstart = mat->cmap->rstart;
1942   PetscReal      sum = 0.0;
1943   MatScalar      *v;
1944 
1945   PetscFunctionBegin;
1946   if (aij->size == 1) {
1947     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1948   } else {
1949     if (type == NORM_FROBENIUS) {
1950       v = amat->a;
1951       for (i=0; i<amat->nz; i++) {
1952 #if defined(PETSC_USE_COMPLEX)
1953         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1954 #else
1955         sum += (*v)*(*v); v++;
1956 #endif
1957       }
1958       v = bmat->a;
1959       for (i=0; i<bmat->nz; i++) {
1960 #if defined(PETSC_USE_COMPLEX)
1961         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1962 #else
1963         sum += (*v)*(*v); v++;
1964 #endif
1965       }
1966       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1967       *norm = PetscSqrtReal(*norm);
1968     } else if (type == NORM_1) { /* max column norm */
1969       PetscReal *tmp,*tmp2;
1970       PetscInt  *jj,*garray = aij->garray;
1971       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1972       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1973       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1974       *norm = 0.0;
1975       v = amat->a; jj = amat->j;
1976       for (j=0; j<amat->nz; j++) {
1977         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1978       }
1979       v = bmat->a; jj = bmat->j;
1980       for (j=0; j<bmat->nz; j++) {
1981         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1982       }
1983       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1984       for (j=0; j<mat->cmap->N; j++) {
1985         if (tmp2[j] > *norm) *norm = tmp2[j];
1986       }
1987       ierr = PetscFree(tmp);CHKERRQ(ierr);
1988       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1989     } else if (type == NORM_INFINITY) { /* max row norm */
1990       PetscReal ntemp = 0.0;
1991       for (j=0; j<aij->A->rmap->n; j++) {
1992         v = amat->a + amat->i[j];
1993         sum = 0.0;
1994         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1995           sum += PetscAbsScalar(*v); v++;
1996         }
1997         v = bmat->a + bmat->i[j];
1998         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1999           sum += PetscAbsScalar(*v); v++;
2000         }
2001         if (sum > ntemp) ntemp = sum;
2002       }
2003       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2004     } else {
2005       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2006     }
2007   }
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatTranspose_MPIAIJ"
2013 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2014 {
2015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2016   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2017   PetscErrorCode ierr;
2018   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2019   PetscInt       cstart=A->cmap->rstart,ncol;
2020   Mat            B;
2021   MatScalar      *array;
2022 
2023   PetscFunctionBegin;
2024   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2025 
2026   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2027   ai = Aloc->i; aj = Aloc->j;
2028   bi = Bloc->i; bj = Bloc->j;
2029   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2030     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2031     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2032     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2033     for (i=0; i<ai[ma]; i++){
2034       d_nnz[aj[i]] ++;
2035       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2036     }
2037 
2038     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2039     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2040     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2041     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2042     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2044     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2045   } else {
2046     B = *matout;
2047   }
2048 
2049   /* copy over the A part */
2050   array = Aloc->a;
2051   row = A->rmap->rstart;
2052   for (i=0; i<ma; i++) {
2053     ncol = ai[i+1]-ai[i];
2054     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2055     row++; array += ncol; aj += ncol;
2056   }
2057   aj = Aloc->j;
2058   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2059 
2060   /* copy over the B part */
2061   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2062   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2063   array = Bloc->a;
2064   row = A->rmap->rstart;
2065   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2066   cols_tmp = cols;
2067   for (i=0; i<mb; i++) {
2068     ncol = bi[i+1]-bi[i];
2069     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2070     row++; array += ncol; cols_tmp += ncol;
2071   }
2072   ierr = PetscFree(cols);CHKERRQ(ierr);
2073 
2074   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2075   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2077     *matout = B;
2078   } else {
2079     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2086 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2087 {
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            a = aij->A,b = aij->B;
2090   PetscErrorCode ierr;
2091   PetscInt       s1,s2,s3;
2092 
2093   PetscFunctionBegin;
2094   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2095   if (rr) {
2096     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2097     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2098     /* Overlap communication with computation. */
2099     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2100   }
2101   if (ll) {
2102     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2103     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2104     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2105   }
2106   /* scale  the diagonal block */
2107   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2108 
2109   if (rr) {
2110     /* Do a scatter end and then right scale the off-diagonal block */
2111     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2112     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2113   }
2114 
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2120 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2121 {
2122   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2123   PetscErrorCode ierr;
2124 
2125   PetscFunctionBegin;
2126   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatEqual_MPIAIJ"
2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2133 {
2134   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2135   Mat            a,b,c,d;
2136   PetscBool      flg;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   a = matA->A; b = matA->B;
2141   c = matB->A; d = matB->B;
2142 
2143   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2144   if (flg) {
2145     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2146   }
2147   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatCopy_MPIAIJ"
2153 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2154 {
2155   PetscErrorCode ierr;
2156   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2157   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2158 
2159   PetscFunctionBegin;
2160   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2161   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2162     /* because of the column compression in the off-processor part of the matrix a->B,
2163        the number of columns in a->B and b->B may be different, hence we cannot call
2164        the MatCopy() directly on the two parts. If need be, we can provide a more
2165        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2166        then copying the submatrices */
2167     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2168   } else {
2169     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2170     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatSetUp_MPIAIJ"
2177 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2178 {
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2188 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2189 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2190 {
2191   PetscInt          i,m=Y->rmap->N;
2192   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2193   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2194   const PetscInt    *xi = x->i,*yi = y->i;
2195 
2196   PetscFunctionBegin;
2197   /* Set the number of nonzeros in the new matrix */
2198   for(i=0; i<m; i++) {
2199     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2200     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2201     nnz[i] = 0;
2202     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2203       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2204       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2205       nnz[i]++;
2206     }
2207     for (; k<nzy; k++) nnz[i]++;
2208   }
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatAXPY_MPIAIJ"
2214 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2215 {
2216   PetscErrorCode ierr;
2217   PetscInt       i;
2218   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2219   PetscBLASInt   bnz,one=1;
2220   Mat_SeqAIJ     *x,*y;
2221 
2222   PetscFunctionBegin;
2223   if (str == SAME_NONZERO_PATTERN) {
2224     PetscScalar alpha = a;
2225     x = (Mat_SeqAIJ *)xx->A->data;
2226     y = (Mat_SeqAIJ *)yy->A->data;
2227     bnz = PetscBLASIntCast(x->nz);
2228     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2229     x = (Mat_SeqAIJ *)xx->B->data;
2230     y = (Mat_SeqAIJ *)yy->B->data;
2231     bnz = PetscBLASIntCast(x->nz);
2232     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2233   } else if (str == SUBSET_NONZERO_PATTERN) {
2234     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2235 
2236     x = (Mat_SeqAIJ *)xx->B->data;
2237     y = (Mat_SeqAIJ *)yy->B->data;
2238     if (y->xtoy && y->XtoY != xx->B) {
2239       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2240       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2241     }
2242     if (!y->xtoy) { /* get xtoy */
2243       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2244       y->XtoY = xx->B;
2245       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2246     }
2247     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2248   } else {
2249     Mat B;
2250     PetscInt *nnz_d,*nnz_o;
2251     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2252     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2253     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2254     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2255     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2256     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2257     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2258     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2259     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2260     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2261     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2262     ierr = MatHeaderReplace(Y,B);
2263     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2264     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2265   }
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatConjugate_MPIAIJ"
2273 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2274 {
2275 #if defined(PETSC_USE_COMPLEX)
2276   PetscErrorCode ierr;
2277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2278 
2279   PetscFunctionBegin;
2280   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2281   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2282 #else
2283   PetscFunctionBegin;
2284 #endif
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatRealPart_MPIAIJ"
2290 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2291 {
2292   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2293   PetscErrorCode ierr;
2294 
2295   PetscFunctionBegin;
2296   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2297   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2303 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2304 {
2305   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2306   PetscErrorCode ierr;
2307 
2308   PetscFunctionBegin;
2309   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2310   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #ifdef PETSC_HAVE_PBGL
2315 
2316 #include <boost/parallel/mpi/bsp_process_group.hpp>
2317 #include <boost/graph/distributed/ilu_default_graph.hpp>
2318 #include <boost/graph/distributed/ilu_0_block.hpp>
2319 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2320 #include <boost/graph/distributed/petsc/interface.hpp>
2321 #include <boost/multi_array.hpp>
2322 #include <boost/parallel/distributed_property_map->hpp>
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2326 /*
2327   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2328 */
2329 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2330 {
2331   namespace petsc = boost::distributed::petsc;
2332 
2333   namespace graph_dist = boost::graph::distributed;
2334   using boost::graph::distributed::ilu_default::process_group_type;
2335   using boost::graph::ilu_permuted;
2336 
2337   PetscBool       row_identity, col_identity;
2338   PetscContainer  c;
2339   PetscInt        m, n, M, N;
2340   PetscErrorCode  ierr;
2341 
2342   PetscFunctionBegin;
2343   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2344   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2345   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2346   if (!row_identity || !col_identity) {
2347     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2348   }
2349 
2350   process_group_type pg;
2351   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2352   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2353   lgraph_type&   level_graph = *lgraph_p;
2354   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2355 
2356   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2357   ilu_permuted(level_graph);
2358 
2359   /* put together the new matrix */
2360   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2361   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2362   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2363   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2364   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2365   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2366   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2367   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2368 
2369   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2370   ierr = PetscContainerSetPointer(c, lgraph_p);
2371   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2372   ierr = PetscContainerDestroy(&c);
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2378 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2379 {
2380   PetscFunctionBegin;
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatSolve_MPIAIJ"
2386 /*
2387   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2388 */
2389 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2390 {
2391   namespace graph_dist = boost::graph::distributed;
2392 
2393   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2394   lgraph_type*   lgraph_p;
2395   PetscContainer c;
2396   PetscErrorCode ierr;
2397 
2398   PetscFunctionBegin;
2399   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2400   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2401   ierr = VecCopy(b, x);CHKERRQ(ierr);
2402 
2403   PetscScalar* array_x;
2404   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2405   PetscInt sx;
2406   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2407 
2408   PetscScalar* array_b;
2409   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2410   PetscInt sb;
2411   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2412 
2413   lgraph_type&   level_graph = *lgraph_p;
2414   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2415 
2416   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2417   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2418                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2419 
2420   typedef boost::iterator_property_map<array_ref_type::iterator,
2421                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2422   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2423                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2424 
2425   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2426 
2427   PetscFunctionReturn(0);
2428 }
2429 #endif
2430 
2431 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2432   PetscInt       nzlocal,nsends,nrecvs;
2433   PetscMPIInt    *send_rank,*recv_rank;
2434   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2435   PetscScalar    *sbuf_a,**rbuf_a;
2436   PetscErrorCode (*Destroy)(Mat);
2437 } Mat_Redundant;
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2441 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2442 {
2443   PetscErrorCode       ierr;
2444   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2445   PetscInt             i;
2446 
2447   PetscFunctionBegin;
2448   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2449   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2450   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2451   for (i=0; i<redund->nrecvs; i++){
2452     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2453     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2454   }
2455   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2456   ierr = PetscFree(redund);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "MatDestroy_MatRedundant"
2462 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2463 {
2464   PetscErrorCode  ierr;
2465   PetscContainer  container;
2466   Mat_Redundant   *redund=PETSC_NULL;
2467 
2468   PetscFunctionBegin;
2469   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2470   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2471   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2472   A->ops->destroy = redund->Destroy;
2473   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2474   if (A->ops->destroy) {
2475     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2476   }
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2482 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2483 {
2484   PetscMPIInt    rank,size;
2485   MPI_Comm       comm=((PetscObject)mat)->comm;
2486   PetscErrorCode ierr;
2487   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2488   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2489   PetscInt       *rowrange=mat->rmap->range;
2490   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2491   Mat            A=aij->A,B=aij->B,C=*matredundant;
2492   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2493   PetscScalar    *sbuf_a;
2494   PetscInt       nzlocal=a->nz+b->nz;
2495   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2496   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2497   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2498   MatScalar      *aworkA,*aworkB;
2499   PetscScalar    *vals;
2500   PetscMPIInt    tag1,tag2,tag3,imdex;
2501   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2502                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2503   MPI_Status     recv_status,*send_status;
2504   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2505   PetscInt       **rbuf_j=PETSC_NULL;
2506   PetscScalar    **rbuf_a=PETSC_NULL;
2507   Mat_Redundant  *redund=PETSC_NULL;
2508   PetscContainer container;
2509 
2510   PetscFunctionBegin;
2511   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2512   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2513 
2514   if (reuse == MAT_REUSE_MATRIX) {
2515     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2516     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2517     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2518     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2519     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2520     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2521     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2522     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2523 
2524     nsends    = redund->nsends;
2525     nrecvs    = redund->nrecvs;
2526     send_rank = redund->send_rank;
2527     recv_rank = redund->recv_rank;
2528     sbuf_nz   = redund->sbuf_nz;
2529     rbuf_nz   = redund->rbuf_nz;
2530     sbuf_j    = redund->sbuf_j;
2531     sbuf_a    = redund->sbuf_a;
2532     rbuf_j    = redund->rbuf_j;
2533     rbuf_a    = redund->rbuf_a;
2534   }
2535 
2536   if (reuse == MAT_INITIAL_MATRIX){
2537     PetscMPIInt  subrank,subsize;
2538     PetscInt     nleftover,np_subcomm;
2539     /* get the destination processors' id send_rank, nsends and nrecvs */
2540     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2541     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2542     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2543     np_subcomm = size/nsubcomm;
2544     nleftover  = size - nsubcomm*np_subcomm;
2545     nsends = 0; nrecvs = 0;
2546     for (i=0; i<size; i++){ /* i=rank*/
2547       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2548         send_rank[nsends] = i; nsends++;
2549         recv_rank[nrecvs++] = i;
2550       }
2551     }
2552     if (rank >= size - nleftover){/* this proc is a leftover processor */
2553       i = size-nleftover-1;
2554       j = 0;
2555       while (j < nsubcomm - nleftover){
2556         send_rank[nsends++] = i;
2557         i--; j++;
2558       }
2559     }
2560 
2561     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2562       for (i=0; i<nleftover; i++){
2563         recv_rank[nrecvs++] = size-nleftover+i;
2564       }
2565     }
2566 
2567     /* allocate sbuf_j, sbuf_a */
2568     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2569     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2570     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2571   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2572 
2573   /* copy mat's local entries into the buffers */
2574   if (reuse == MAT_INITIAL_MATRIX){
2575     rownz_max = 0;
2576     rptr = sbuf_j;
2577     cols = sbuf_j + rend-rstart + 1;
2578     vals = sbuf_a;
2579     rptr[0] = 0;
2580     for (i=0; i<rend-rstart; i++){
2581       row = i + rstart;
2582       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2583       ncols  = nzA + nzB;
2584       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2585       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2586       /* load the column indices for this row into cols */
2587       lwrite = 0;
2588       for (l=0; l<nzB; l++) {
2589         if ((ctmp = bmap[cworkB[l]]) < cstart){
2590           vals[lwrite]   = aworkB[l];
2591           cols[lwrite++] = ctmp;
2592         }
2593       }
2594       for (l=0; l<nzA; l++){
2595         vals[lwrite]   = aworkA[l];
2596         cols[lwrite++] = cstart + cworkA[l];
2597       }
2598       for (l=0; l<nzB; l++) {
2599         if ((ctmp = bmap[cworkB[l]]) >= cend){
2600           vals[lwrite]   = aworkB[l];
2601           cols[lwrite++] = ctmp;
2602         }
2603       }
2604       vals += ncols;
2605       cols += ncols;
2606       rptr[i+1] = rptr[i] + ncols;
2607       if (rownz_max < ncols) rownz_max = ncols;
2608     }
2609     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2610   } else { /* only copy matrix values into sbuf_a */
2611     rptr = sbuf_j;
2612     vals = sbuf_a;
2613     rptr[0] = 0;
2614     for (i=0; i<rend-rstart; i++){
2615       row = i + rstart;
2616       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2617       ncols  = nzA + nzB;
2618       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2619       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2620       lwrite = 0;
2621       for (l=0; l<nzB; l++) {
2622         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2623       }
2624       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2625       for (l=0; l<nzB; l++) {
2626         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2627       }
2628       vals += ncols;
2629       rptr[i+1] = rptr[i] + ncols;
2630     }
2631   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2632 
2633   /* send nzlocal to others, and recv other's nzlocal */
2634   /*--------------------------------------------------*/
2635   if (reuse == MAT_INITIAL_MATRIX){
2636     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2637     s_waits2 = s_waits3 + nsends;
2638     s_waits1 = s_waits2 + nsends;
2639     r_waits1 = s_waits1 + nsends;
2640     r_waits2 = r_waits1 + nrecvs;
2641     r_waits3 = r_waits2 + nrecvs;
2642   } else {
2643     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2644     r_waits3 = s_waits3 + nsends;
2645   }
2646 
2647   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2648   if (reuse == MAT_INITIAL_MATRIX){
2649     /* get new tags to keep the communication clean */
2650     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2651     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2652     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2653 
2654     /* post receives of other's nzlocal */
2655     for (i=0; i<nrecvs; i++){
2656       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2657     }
2658     /* send nzlocal to others */
2659     for (i=0; i<nsends; i++){
2660       sbuf_nz[i] = nzlocal;
2661       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2662     }
2663     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2664     count = nrecvs;
2665     while (count) {
2666       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2667       recv_rank[imdex] = recv_status.MPI_SOURCE;
2668       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2669       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2670 
2671       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2672       rbuf_nz[imdex] += i + 2;
2673       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2674       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2675       count--;
2676     }
2677     /* wait on sends of nzlocal */
2678     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2679     /* send mat->i,j to others, and recv from other's */
2680     /*------------------------------------------------*/
2681     for (i=0; i<nsends; i++){
2682       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2683       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2684     }
2685     /* wait on receives of mat->i,j */
2686     /*------------------------------*/
2687     count = nrecvs;
2688     while (count) {
2689       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2690       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2691       count--;
2692     }
2693     /* wait on sends of mat->i,j */
2694     /*---------------------------*/
2695     if (nsends) {
2696       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2697     }
2698   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2699 
2700   /* post receives, send and receive mat->a */
2701   /*----------------------------------------*/
2702   for (imdex=0; imdex<nrecvs; imdex++) {
2703     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2704   }
2705   for (i=0; i<nsends; i++){
2706     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2707   }
2708   count = nrecvs;
2709   while (count) {
2710     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2711     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2712     count--;
2713   }
2714   if (nsends) {
2715     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2716   }
2717 
2718   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2719 
2720   /* create redundant matrix */
2721   /*-------------------------*/
2722   if (reuse == MAT_INITIAL_MATRIX){
2723     /* compute rownz_max for preallocation */
2724     for (imdex=0; imdex<nrecvs; imdex++){
2725       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2726       rptr = rbuf_j[imdex];
2727       for (i=0; i<j; i++){
2728         ncols = rptr[i+1] - rptr[i];
2729         if (rownz_max < ncols) rownz_max = ncols;
2730       }
2731     }
2732 
2733     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2734     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2735     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2736     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2737     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2738     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2739   } else {
2740     C = *matredundant;
2741   }
2742 
2743   /* insert local matrix entries */
2744   rptr = sbuf_j;
2745   cols = sbuf_j + rend-rstart + 1;
2746   vals = sbuf_a;
2747   for (i=0; i<rend-rstart; i++){
2748     row   = i + rstart;
2749     ncols = rptr[i+1] - rptr[i];
2750     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2751     vals += ncols;
2752     cols += ncols;
2753   }
2754   /* insert received matrix entries */
2755   for (imdex=0; imdex<nrecvs; imdex++){
2756     rstart = rowrange[recv_rank[imdex]];
2757     rend   = rowrange[recv_rank[imdex]+1];
2758     rptr = rbuf_j[imdex];
2759     cols = rbuf_j[imdex] + rend-rstart + 1;
2760     vals = rbuf_a[imdex];
2761     for (i=0; i<rend-rstart; i++){
2762       row   = i + rstart;
2763       ncols = rptr[i+1] - rptr[i];
2764       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2765       vals += ncols;
2766       cols += ncols;
2767     }
2768   }
2769   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2770   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2771   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2772   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2773   if (reuse == MAT_INITIAL_MATRIX) {
2774     PetscContainer container;
2775     *matredundant = C;
2776     /* create a supporting struct and attach it to C for reuse */
2777     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2778     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2779     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2780     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2781     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2782     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2783 
2784     redund->nzlocal = nzlocal;
2785     redund->nsends  = nsends;
2786     redund->nrecvs  = nrecvs;
2787     redund->send_rank = send_rank;
2788     redund->recv_rank = recv_rank;
2789     redund->sbuf_nz = sbuf_nz;
2790     redund->rbuf_nz = rbuf_nz;
2791     redund->sbuf_j  = sbuf_j;
2792     redund->sbuf_a  = sbuf_a;
2793     redund->rbuf_j  = rbuf_j;
2794     redund->rbuf_a  = rbuf_a;
2795 
2796     redund->Destroy = C->ops->destroy;
2797     C->ops->destroy = MatDestroy_MatRedundant;
2798   }
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2804 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2805 {
2806   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2807   PetscErrorCode ierr;
2808   PetscInt       i,*idxb = 0;
2809   PetscScalar    *va,*vb;
2810   Vec            vtmp;
2811 
2812   PetscFunctionBegin;
2813   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2814   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2815   if (idx) {
2816     for (i=0; i<A->rmap->n; i++) {
2817       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2818     }
2819   }
2820 
2821   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2822   if (idx) {
2823     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2824   }
2825   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2826   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2827 
2828   for (i=0; i<A->rmap->n; i++){
2829     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2830       va[i] = vb[i];
2831       if (idx) idx[i] = a->garray[idxb[i]];
2832     }
2833   }
2834 
2835   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2836   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2837   ierr = PetscFree(idxb);CHKERRQ(ierr);
2838   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 #undef __FUNCT__
2843 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2844 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2845 {
2846   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2847   PetscErrorCode ierr;
2848   PetscInt       i,*idxb = 0;
2849   PetscScalar    *va,*vb;
2850   Vec            vtmp;
2851 
2852   PetscFunctionBegin;
2853   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2854   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2855   if (idx) {
2856     for (i=0; i<A->cmap->n; i++) {
2857       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2858     }
2859   }
2860 
2861   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2862   if (idx) {
2863     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2864   }
2865   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2866   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2867 
2868   for (i=0; i<A->rmap->n; i++){
2869     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2870       va[i] = vb[i];
2871       if (idx) idx[i] = a->garray[idxb[i]];
2872     }
2873   }
2874 
2875   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2876   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2877   ierr = PetscFree(idxb);CHKERRQ(ierr);
2878   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 #undef __FUNCT__
2883 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2884 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2885 {
2886   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2887   PetscInt       n      = A->rmap->n;
2888   PetscInt       cstart = A->cmap->rstart;
2889   PetscInt      *cmap   = mat->garray;
2890   PetscInt      *diagIdx, *offdiagIdx;
2891   Vec            diagV, offdiagV;
2892   PetscScalar   *a, *diagA, *offdiagA;
2893   PetscInt       r;
2894   PetscErrorCode ierr;
2895 
2896   PetscFunctionBegin;
2897   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2898   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2899   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2900   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2901   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2902   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2903   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2904   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2905   for(r = 0; r < n; ++r) {
2906     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2907       a[r]   = diagA[r];
2908       idx[r] = cstart + diagIdx[r];
2909     } else {
2910       a[r]   = offdiagA[r];
2911       idx[r] = cmap[offdiagIdx[r]];
2912     }
2913   }
2914   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2915   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2916   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2917   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2918   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2919   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2920   PetscFunctionReturn(0);
2921 }
2922 
2923 #undef __FUNCT__
2924 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2925 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2926 {
2927   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2928   PetscInt       n      = A->rmap->n;
2929   PetscInt       cstart = A->cmap->rstart;
2930   PetscInt      *cmap   = mat->garray;
2931   PetscInt      *diagIdx, *offdiagIdx;
2932   Vec            diagV, offdiagV;
2933   PetscScalar   *a, *diagA, *offdiagA;
2934   PetscInt       r;
2935   PetscErrorCode ierr;
2936 
2937   PetscFunctionBegin;
2938   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2939   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2940   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2941   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2942   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2943   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2944   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2945   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2946   for(r = 0; r < n; ++r) {
2947     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2948       a[r]   = diagA[r];
2949       idx[r] = cstart + diagIdx[r];
2950     } else {
2951       a[r]   = offdiagA[r];
2952       idx[r] = cmap[offdiagIdx[r]];
2953     }
2954   }
2955   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2956   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2957   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2958   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2959   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2960   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 #undef __FUNCT__
2965 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2966 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2967 {
2968   PetscErrorCode ierr;
2969   Mat            *dummy;
2970 
2971   PetscFunctionBegin;
2972   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2973   *newmat = *dummy;
2974   ierr = PetscFree(dummy);CHKERRQ(ierr);
2975   PetscFunctionReturn(0);
2976 }
2977 
2978 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2979 
2980 #undef __FUNCT__
2981 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2982 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2983 {
2984   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2985   PetscErrorCode ierr;
2986 
2987   PetscFunctionBegin;
2988   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 
2993 /* -------------------------------------------------------------------*/
2994 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2995        MatGetRow_MPIAIJ,
2996        MatRestoreRow_MPIAIJ,
2997        MatMult_MPIAIJ,
2998 /* 4*/ MatMultAdd_MPIAIJ,
2999        MatMultTranspose_MPIAIJ,
3000        MatMultTransposeAdd_MPIAIJ,
3001 #ifdef PETSC_HAVE_PBGL
3002        MatSolve_MPIAIJ,
3003 #else
3004        0,
3005 #endif
3006        0,
3007        0,
3008 /*10*/ 0,
3009        0,
3010        0,
3011        MatSOR_MPIAIJ,
3012        MatTranspose_MPIAIJ,
3013 /*15*/ MatGetInfo_MPIAIJ,
3014        MatEqual_MPIAIJ,
3015        MatGetDiagonal_MPIAIJ,
3016        MatDiagonalScale_MPIAIJ,
3017        MatNorm_MPIAIJ,
3018 /*20*/ MatAssemblyBegin_MPIAIJ,
3019        MatAssemblyEnd_MPIAIJ,
3020        MatSetOption_MPIAIJ,
3021        MatZeroEntries_MPIAIJ,
3022 /*24*/ MatZeroRows_MPIAIJ,
3023        0,
3024 #ifdef PETSC_HAVE_PBGL
3025        0,
3026 #else
3027        0,
3028 #endif
3029        0,
3030        0,
3031 /*29*/ MatSetUp_MPIAIJ,
3032 #ifdef PETSC_HAVE_PBGL
3033        0,
3034 #else
3035        0,
3036 #endif
3037        0,
3038        0,
3039        0,
3040 /*34*/ MatDuplicate_MPIAIJ,
3041        0,
3042        0,
3043        0,
3044        0,
3045 /*39*/ MatAXPY_MPIAIJ,
3046        MatGetSubMatrices_MPIAIJ,
3047        MatIncreaseOverlap_MPIAIJ,
3048        MatGetValues_MPIAIJ,
3049        MatCopy_MPIAIJ,
3050 /*44*/ MatGetRowMax_MPIAIJ,
3051        MatScale_MPIAIJ,
3052        0,
3053        0,
3054        MatZeroRowsColumns_MPIAIJ,
3055 /*49*/ 0,
3056        0,
3057        0,
3058        0,
3059        0,
3060 /*54*/ MatFDColoringCreate_MPIAIJ,
3061        0,
3062        MatSetUnfactored_MPIAIJ,
3063        0, /* MatPermute_MPIAIJ, impl currently broken */
3064        0,
3065 /*59*/ MatGetSubMatrix_MPIAIJ,
3066        MatDestroy_MPIAIJ,
3067        MatView_MPIAIJ,
3068        0,
3069        0,
3070 /*64*/ 0,
3071        0,
3072        0,
3073        0,
3074        0,
3075 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3076        MatGetRowMinAbs_MPIAIJ,
3077        0,
3078        MatSetColoring_MPIAIJ,
3079 #if defined(PETSC_HAVE_ADIC)
3080        MatSetValuesAdic_MPIAIJ,
3081 #else
3082        0,
3083 #endif
3084        MatSetValuesAdifor_MPIAIJ,
3085 /*75*/ MatFDColoringApply_AIJ,
3086        0,
3087        0,
3088        0,
3089        0,
3090 /*80*/ 0,
3091        0,
3092        0,
3093 /*83*/ MatLoad_MPIAIJ,
3094        0,
3095        0,
3096        0,
3097        0,
3098        0,
3099 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3100        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3101        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3102        MatPtAP_Basic,
3103        MatPtAPSymbolic_MPIAIJ,
3104 /*94*/ MatPtAPNumeric_MPIAIJ,
3105        0,
3106        0,
3107        0,
3108        0,
3109 /*99*/ 0,
3110        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3111        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3112        MatConjugate_MPIAIJ,
3113        0,
3114 /*104*/MatSetValuesRow_MPIAIJ,
3115        MatRealPart_MPIAIJ,
3116        MatImaginaryPart_MPIAIJ,
3117        0,
3118        0,
3119 /*109*/0,
3120        MatGetRedundantMatrix_MPIAIJ,
3121        MatGetRowMin_MPIAIJ,
3122        0,
3123        0,
3124 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3125        0,
3126        0,
3127        0,
3128        0,
3129 /*119*/0,
3130        0,
3131        0,
3132        0,
3133        MatGetMultiProcBlock_MPIAIJ,
3134 /*124*/MatFindNonZeroRows_MPIAIJ,
3135        MatGetColumnNorms_MPIAIJ,
3136        MatInvertBlockDiagonal_MPIAIJ,
3137        0,
3138        MatGetSubMatricesParallel_MPIAIJ,
3139 /*129*/0,
3140        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3141        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3142        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3143        0,
3144 /*134*/0,
3145        0,
3146        0,
3147        0,
3148        0
3149 };
3150 
3151 /* ----------------------------------------------------------------------------------------*/
3152 
3153 EXTERN_C_BEGIN
3154 #undef __FUNCT__
3155 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3156 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3157 {
3158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3159   PetscErrorCode ierr;
3160 
3161   PetscFunctionBegin;
3162   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3163   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3164   PetscFunctionReturn(0);
3165 }
3166 EXTERN_C_END
3167 
3168 EXTERN_C_BEGIN
3169 #undef __FUNCT__
3170 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3171 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3172 {
3173   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3174   PetscErrorCode ierr;
3175 
3176   PetscFunctionBegin;
3177   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3178   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3179   PetscFunctionReturn(0);
3180 }
3181 EXTERN_C_END
3182 
3183 EXTERN_C_BEGIN
3184 #undef __FUNCT__
3185 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3186 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3187 {
3188   Mat_MPIAIJ     *b;
3189   PetscErrorCode ierr;
3190   PetscInt       i;
3191   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3192 
3193   PetscFunctionBegin;
3194   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3195   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3196   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3197   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3198   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3199   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3200 
3201   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3202   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3203   if (d_nnz) {
3204     for (i=0; i<B->rmap->n; i++) {
3205       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3206     }
3207   }
3208   if (o_nnz) {
3209     for (i=0; i<B->rmap->n; i++) {
3210       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3211     }
3212   }
3213   b = (Mat_MPIAIJ*)B->data;
3214 
3215   if (!B->preallocated) {
3216     /* Explicitly create 2 MATSEQAIJ matrices. */
3217     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3218     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3219     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3220     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3221     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3222     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3223     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3224     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3225     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3226     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3227   }
3228 
3229   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3230   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3231   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3232   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3233   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3234   B->preallocated = PETSC_TRUE;
3235   PetscFunctionReturn(0);
3236 }
3237 EXTERN_C_END
3238 
3239 #undef __FUNCT__
3240 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3241 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3242 {
3243   Mat            mat;
3244   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3245   PetscErrorCode ierr;
3246 
3247   PetscFunctionBegin;
3248   *newmat       = 0;
3249   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3250   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3251   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3252   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3253   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3254   a    = (Mat_MPIAIJ*)mat->data;
3255 
3256   mat->factortype    = matin->factortype;
3257   mat->rmap->bs      = matin->rmap->bs;
3258   mat->cmap->bs      = matin->cmap->bs;
3259   mat->assembled    = PETSC_TRUE;
3260   mat->insertmode   = NOT_SET_VALUES;
3261   mat->preallocated = PETSC_TRUE;
3262 
3263   a->size           = oldmat->size;
3264   a->rank           = oldmat->rank;
3265   a->donotstash     = oldmat->donotstash;
3266   a->roworiented    = oldmat->roworiented;
3267   a->rowindices     = 0;
3268   a->rowvalues      = 0;
3269   a->getrowactive   = PETSC_FALSE;
3270 
3271   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3272   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3273 
3274   if (oldmat->colmap) {
3275 #if defined (PETSC_USE_CTABLE)
3276     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3277 #else
3278     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3279     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3280     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3281 #endif
3282   } else a->colmap = 0;
3283   if (oldmat->garray) {
3284     PetscInt len;
3285     len  = oldmat->B->cmap->n;
3286     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3287     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3288     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3289   } else a->garray = 0;
3290 
3291   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3292   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3293   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3294   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3295   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3296   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3297   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3298   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3299   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3300   *newmat = mat;
3301   PetscFunctionReturn(0);
3302 }
3303 
3304 
3305 
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatLoad_MPIAIJ"
3308 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3309 {
3310   PetscScalar    *vals,*svals;
3311   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3312   PetscErrorCode ierr;
3313   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3314   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3315   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3316   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3317   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3318   int            fd;
3319   PetscInt       bs = 1;
3320 
3321   PetscFunctionBegin;
3322   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3323   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3324   if (!rank) {
3325     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3326     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3327     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3328   }
3329 
3330   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3331   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3332   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3333 
3334   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3335 
3336   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3337   M = header[1]; N = header[2];
3338   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3339   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3340   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3341 
3342   /* If global sizes are set, check if they are consistent with that given in the file */
3343   if (sizesset) {
3344     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3345   }
3346   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3347   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3348 
3349   /* determine ownership of all (block) rows */
3350   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3351   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3352   else m = newMat->rmap->n; /* Set by user */
3353 
3354   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3355   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3356 
3357   /* First process needs enough room for process with most rows */
3358   if (!rank) {
3359     mmax       = rowners[1];
3360     for (i=2; i<size; i++) {
3361       mmax = PetscMax(mmax,rowners[i]);
3362     }
3363   } else mmax = m;
3364 
3365   rowners[0] = 0;
3366   for (i=2; i<=size; i++) {
3367     rowners[i] += rowners[i-1];
3368   }
3369   rstart = rowners[rank];
3370   rend   = rowners[rank+1];
3371 
3372   /* distribute row lengths to all processors */
3373   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3374   if (!rank) {
3375     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3376     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3377     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3378     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3379     for (j=0; j<m; j++) {
3380       procsnz[0] += ourlens[j];
3381     }
3382     for (i=1; i<size; i++) {
3383       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3384       /* calculate the number of nonzeros on each processor */
3385       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3386         procsnz[i] += rowlengths[j];
3387       }
3388       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3389     }
3390     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3391   } else {
3392     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3393   }
3394 
3395   if (!rank) {
3396     /* determine max buffer needed and allocate it */
3397     maxnz = 0;
3398     for (i=0; i<size; i++) {
3399       maxnz = PetscMax(maxnz,procsnz[i]);
3400     }
3401     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3402 
3403     /* read in my part of the matrix column indices  */
3404     nz   = procsnz[0];
3405     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3406     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3407 
3408     /* read in every one elses and ship off */
3409     for (i=1; i<size; i++) {
3410       nz     = procsnz[i];
3411       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3412       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3413     }
3414     ierr = PetscFree(cols);CHKERRQ(ierr);
3415   } else {
3416     /* determine buffer space needed for message */
3417     nz = 0;
3418     for (i=0; i<m; i++) {
3419       nz += ourlens[i];
3420     }
3421     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3422 
3423     /* receive message of column indices*/
3424     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3425   }
3426 
3427   /* determine column ownership if matrix is not square */
3428   if (N != M) {
3429     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3430     else n = newMat->cmap->n;
3431     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3432     cstart = cend - n;
3433   } else {
3434     cstart = rstart;
3435     cend   = rend;
3436     n      = cend - cstart;
3437   }
3438 
3439   /* loop over local rows, determining number of off diagonal entries */
3440   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3441   jj = 0;
3442   for (i=0; i<m; i++) {
3443     for (j=0; j<ourlens[i]; j++) {
3444       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3445       jj++;
3446     }
3447   }
3448 
3449   for (i=0; i<m; i++) {
3450     ourlens[i] -= offlens[i];
3451   }
3452   if (!sizesset) {
3453     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3454   }
3455 
3456   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3457 
3458   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3459 
3460   for (i=0; i<m; i++) {
3461     ourlens[i] += offlens[i];
3462   }
3463 
3464   if (!rank) {
3465     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3466 
3467     /* read in my part of the matrix numerical values  */
3468     nz   = procsnz[0];
3469     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3470 
3471     /* insert into matrix */
3472     jj      = rstart;
3473     smycols = mycols;
3474     svals   = vals;
3475     for (i=0; i<m; i++) {
3476       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3477       smycols += ourlens[i];
3478       svals   += ourlens[i];
3479       jj++;
3480     }
3481 
3482     /* read in other processors and ship out */
3483     for (i=1; i<size; i++) {
3484       nz     = procsnz[i];
3485       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3486       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3487     }
3488     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3489   } else {
3490     /* receive numeric values */
3491     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3492 
3493     /* receive message of values*/
3494     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3495 
3496     /* insert into matrix */
3497     jj      = rstart;
3498     smycols = mycols;
3499     svals   = vals;
3500     for (i=0; i<m; i++) {
3501       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3502       smycols += ourlens[i];
3503       svals   += ourlens[i];
3504       jj++;
3505     }
3506   }
3507   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3508   ierr = PetscFree(vals);CHKERRQ(ierr);
3509   ierr = PetscFree(mycols);CHKERRQ(ierr);
3510   ierr = PetscFree(rowners);CHKERRQ(ierr);
3511 
3512   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3513   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3514 
3515   PetscFunctionReturn(0);
3516 }
3517 
3518 #undef __FUNCT__
3519 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3520 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3521 {
3522   PetscErrorCode ierr;
3523   IS             iscol_local;
3524   PetscInt       csize;
3525 
3526   PetscFunctionBegin;
3527   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3528   if (call == MAT_REUSE_MATRIX) {
3529     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3530     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3531   } else {
3532     PetscInt cbs;
3533     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3534     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3535     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3536   }
3537   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3538   if (call == MAT_INITIAL_MATRIX) {
3539     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3540     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3541   }
3542   PetscFunctionReturn(0);
3543 }
3544 
3545 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3548 /*
3549     Not great since it makes two copies of the submatrix, first an SeqAIJ
3550   in local and then by concatenating the local matrices the end result.
3551   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3552 
3553   Note: This requires a sequential iscol with all indices.
3554 */
3555 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3556 {
3557   PetscErrorCode ierr;
3558   PetscMPIInt    rank,size;
3559   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3560   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3561   PetscBool      allcolumns, colflag;
3562   Mat            M,Mreuse;
3563   MatScalar      *vwork,*aa;
3564   MPI_Comm       comm = ((PetscObject)mat)->comm;
3565   Mat_SeqAIJ     *aij;
3566 
3567 
3568   PetscFunctionBegin;
3569   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3570   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3571 
3572   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3573   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3574   if (colflag && ncol == mat->cmap->N){
3575     allcolumns = PETSC_TRUE;
3576   } else {
3577     allcolumns = PETSC_FALSE;
3578   }
3579   if (call ==  MAT_REUSE_MATRIX) {
3580     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3581     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3582     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3583   } else {
3584     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3585   }
3586 
3587   /*
3588       m - number of local rows
3589       n - number of columns (same on all processors)
3590       rstart - first row in new global matrix generated
3591   */
3592   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3593   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3594   if (call == MAT_INITIAL_MATRIX) {
3595     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3596     ii  = aij->i;
3597     jj  = aij->j;
3598 
3599     /*
3600         Determine the number of non-zeros in the diagonal and off-diagonal
3601         portions of the matrix in order to do correct preallocation
3602     */
3603 
3604     /* first get start and end of "diagonal" columns */
3605     if (csize == PETSC_DECIDE) {
3606       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3607       if (mglobal == n) { /* square matrix */
3608 	nlocal = m;
3609       } else {
3610         nlocal = n/size + ((n % size) > rank);
3611       }
3612     } else {
3613       nlocal = csize;
3614     }
3615     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3616     rstart = rend - nlocal;
3617     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3618 
3619     /* next, compute all the lengths */
3620     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3621     olens = dlens + m;
3622     for (i=0; i<m; i++) {
3623       jend = ii[i+1] - ii[i];
3624       olen = 0;
3625       dlen = 0;
3626       for (j=0; j<jend; j++) {
3627         if (*jj < rstart || *jj >= rend) olen++;
3628         else dlen++;
3629         jj++;
3630       }
3631       olens[i] = olen;
3632       dlens[i] = dlen;
3633     }
3634     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3635     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3636     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3637     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3638     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3639     ierr = PetscFree(dlens);CHKERRQ(ierr);
3640   } else {
3641     PetscInt ml,nl;
3642 
3643     M = *newmat;
3644     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3645     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3646     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3647     /*
3648          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3649        rather than the slower MatSetValues().
3650     */
3651     M->was_assembled = PETSC_TRUE;
3652     M->assembled     = PETSC_FALSE;
3653   }
3654   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3655   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3656   ii  = aij->i;
3657   jj  = aij->j;
3658   aa  = aij->a;
3659   for (i=0; i<m; i++) {
3660     row   = rstart + i;
3661     nz    = ii[i+1] - ii[i];
3662     cwork = jj;     jj += nz;
3663     vwork = aa;     aa += nz;
3664     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3665   }
3666 
3667   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3668   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3669   *newmat = M;
3670 
3671   /* save submatrix used in processor for next request */
3672   if (call ==  MAT_INITIAL_MATRIX) {
3673     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3674     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3675   }
3676 
3677   PetscFunctionReturn(0);
3678 }
3679 
3680 EXTERN_C_BEGIN
3681 #undef __FUNCT__
3682 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3683 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3684 {
3685   PetscInt       m,cstart, cend,j,nnz,i,d;
3686   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3687   const PetscInt *JJ;
3688   PetscScalar    *values;
3689   PetscErrorCode ierr;
3690 
3691   PetscFunctionBegin;
3692   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3693 
3694   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3695   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3696   m      = B->rmap->n;
3697   cstart = B->cmap->rstart;
3698   cend   = B->cmap->rend;
3699   rstart = B->rmap->rstart;
3700 
3701   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3702 
3703 #if defined(PETSC_USE_DEBUGGING)
3704   for (i=0; i<m; i++) {
3705     nnz     = Ii[i+1]- Ii[i];
3706     JJ      = J + Ii[i];
3707     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3708     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3709     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3710   }
3711 #endif
3712 
3713   for (i=0; i<m; i++) {
3714     nnz     = Ii[i+1]- Ii[i];
3715     JJ      = J + Ii[i];
3716     nnz_max = PetscMax(nnz_max,nnz);
3717     d       = 0;
3718     for (j=0; j<nnz; j++) {
3719       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3720     }
3721     d_nnz[i] = d;
3722     o_nnz[i] = nnz - d;
3723   }
3724   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3725   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3726 
3727   if (v) values = (PetscScalar*)v;
3728   else {
3729     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3730     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3731   }
3732 
3733   for (i=0; i<m; i++) {
3734     ii   = i + rstart;
3735     nnz  = Ii[i+1]- Ii[i];
3736     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3737   }
3738   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3739   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3740 
3741   if (!v) {
3742     ierr = PetscFree(values);CHKERRQ(ierr);
3743   }
3744   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3745   PetscFunctionReturn(0);
3746 }
3747 EXTERN_C_END
3748 
3749 #undef __FUNCT__
3750 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3751 /*@
3752    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3753    (the default parallel PETSc format).
3754 
3755    Collective on MPI_Comm
3756 
3757    Input Parameters:
3758 +  B - the matrix
3759 .  i - the indices into j for the start of each local row (starts with zero)
3760 .  j - the column indices for each local row (starts with zero)
3761 -  v - optional values in the matrix
3762 
3763    Level: developer
3764 
3765    Notes:
3766        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3767      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3768      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3769 
3770        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3771 
3772        The format which is used for the sparse matrix input, is equivalent to a
3773     row-major ordering.. i.e for the following matrix, the input data expected is
3774     as shown:
3775 
3776         1 0 0
3777         2 0 3     P0
3778        -------
3779         4 5 6     P1
3780 
3781      Process0 [P0]: rows_owned=[0,1]
3782         i =  {0,1,3}  [size = nrow+1  = 2+1]
3783         j =  {0,0,2}  [size = nz = 6]
3784         v =  {1,2,3}  [size = nz = 6]
3785 
3786      Process1 [P1]: rows_owned=[2]
3787         i =  {0,3}    [size = nrow+1  = 1+1]
3788         j =  {0,1,2}  [size = nz = 6]
3789         v =  {4,5,6}  [size = nz = 6]
3790 
3791 .keywords: matrix, aij, compressed row, sparse, parallel
3792 
3793 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3794           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3795 @*/
3796 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3797 {
3798   PetscErrorCode ierr;
3799 
3800   PetscFunctionBegin;
3801   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3802   PetscFunctionReturn(0);
3803 }
3804 
3805 #undef __FUNCT__
3806 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3807 /*@C
3808    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3809    (the default parallel PETSc format).  For good matrix assembly performance
3810    the user should preallocate the matrix storage by setting the parameters
3811    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3812    performance can be increased by more than a factor of 50.
3813 
3814    Collective on MPI_Comm
3815 
3816    Input Parameters:
3817 +  A - the matrix
3818 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3819            (same value is used for all local rows)
3820 .  d_nnz - array containing the number of nonzeros in the various rows of the
3821            DIAGONAL portion of the local submatrix (possibly different for each row)
3822            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3823            The size of this array is equal to the number of local rows, i.e 'm'.
3824            For matrices that will be factored, you must leave room for (and set)
3825            the diagonal entry even if it is zero.
3826 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3827            submatrix (same value is used for all local rows).
3828 -  o_nnz - array containing the number of nonzeros in the various rows of the
3829            OFF-DIAGONAL portion of the local submatrix (possibly different for
3830            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3831            structure. The size of this array is equal to the number
3832            of local rows, i.e 'm'.
3833 
3834    If the *_nnz parameter is given then the *_nz parameter is ignored
3835 
3836    The AIJ format (also called the Yale sparse matrix format or
3837    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3838    storage.  The stored row and column indices begin with zero.
3839    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3840 
3841    The parallel matrix is partitioned such that the first m0 rows belong to
3842    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3843    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3844 
3845    The DIAGONAL portion of the local submatrix of a processor can be defined
3846    as the submatrix which is obtained by extraction the part corresponding to
3847    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3848    first row that belongs to the processor, r2 is the last row belonging to
3849    the this processor, and c1-c2 is range of indices of the local part of a
3850    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3851    common case of a square matrix, the row and column ranges are the same and
3852    the DIAGONAL part is also square. The remaining portion of the local
3853    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3854 
3855    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3856 
3857    You can call MatGetInfo() to get information on how effective the preallocation was;
3858    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3859    You can also run with the option -info and look for messages with the string
3860    malloc in them to see if additional memory allocation was needed.
3861 
3862    Example usage:
3863 
3864    Consider the following 8x8 matrix with 34 non-zero values, that is
3865    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3866    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3867    as follows:
3868 
3869 .vb
3870             1  2  0  |  0  3  0  |  0  4
3871     Proc0   0  5  6  |  7  0  0  |  8  0
3872             9  0 10  | 11  0  0  | 12  0
3873     -------------------------------------
3874            13  0 14  | 15 16 17  |  0  0
3875     Proc1   0 18  0  | 19 20 21  |  0  0
3876             0  0  0  | 22 23  0  | 24  0
3877     -------------------------------------
3878     Proc2  25 26 27  |  0  0 28  | 29  0
3879            30  0  0  | 31 32 33  |  0 34
3880 .ve
3881 
3882    This can be represented as a collection of submatrices as:
3883 
3884 .vb
3885       A B C
3886       D E F
3887       G H I
3888 .ve
3889 
3890    Where the submatrices A,B,C are owned by proc0, D,E,F are
3891    owned by proc1, G,H,I are owned by proc2.
3892 
3893    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3894    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3895    The 'M','N' parameters are 8,8, and have the same values on all procs.
3896 
3897    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3898    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3899    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3900    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3901    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3902    matrix, ans [DF] as another SeqAIJ matrix.
3903 
3904    When d_nz, o_nz parameters are specified, d_nz storage elements are
3905    allocated for every row of the local diagonal submatrix, and o_nz
3906    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3907    One way to choose d_nz and o_nz is to use the max nonzerors per local
3908    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3909    In this case, the values of d_nz,o_nz are:
3910 .vb
3911      proc0 : dnz = 2, o_nz = 2
3912      proc1 : dnz = 3, o_nz = 2
3913      proc2 : dnz = 1, o_nz = 4
3914 .ve
3915    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3916    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3917    for proc3. i.e we are using 12+15+10=37 storage locations to store
3918    34 values.
3919 
3920    When d_nnz, o_nnz parameters are specified, the storage is specified
3921    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3922    In the above case the values for d_nnz,o_nnz are:
3923 .vb
3924      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3925      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3926      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3927 .ve
3928    Here the space allocated is sum of all the above values i.e 34, and
3929    hence pre-allocation is perfect.
3930 
3931    Level: intermediate
3932 
3933 .keywords: matrix, aij, compressed row, sparse, parallel
3934 
3935 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3936           MPIAIJ, MatGetInfo()
3937 @*/
3938 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3939 {
3940   PetscErrorCode ierr;
3941 
3942   PetscFunctionBegin;
3943   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3944   PetscValidType(B,1);
3945   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3946   PetscFunctionReturn(0);
3947 }
3948 
3949 #undef __FUNCT__
3950 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3951 /*@
3952      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3953          CSR format the local rows.
3954 
3955    Collective on MPI_Comm
3956 
3957    Input Parameters:
3958 +  comm - MPI communicator
3959 .  m - number of local rows (Cannot be PETSC_DECIDE)
3960 .  n - This value should be the same as the local size used in creating the
3961        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3962        calculated if N is given) For square matrices n is almost always m.
3963 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3964 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3965 .   i - row indices
3966 .   j - column indices
3967 -   a - matrix values
3968 
3969    Output Parameter:
3970 .   mat - the matrix
3971 
3972    Level: intermediate
3973 
3974    Notes:
3975        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3976      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3977      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3978 
3979        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3980 
3981        The format which is used for the sparse matrix input, is equivalent to a
3982     row-major ordering.. i.e for the following matrix, the input data expected is
3983     as shown:
3984 
3985         1 0 0
3986         2 0 3     P0
3987        -------
3988         4 5 6     P1
3989 
3990      Process0 [P0]: rows_owned=[0,1]
3991         i =  {0,1,3}  [size = nrow+1  = 2+1]
3992         j =  {0,0,2}  [size = nz = 6]
3993         v =  {1,2,3}  [size = nz = 6]
3994 
3995      Process1 [P1]: rows_owned=[2]
3996         i =  {0,3}    [size = nrow+1  = 1+1]
3997         j =  {0,1,2}  [size = nz = 6]
3998         v =  {4,5,6}  [size = nz = 6]
3999 
4000 .keywords: matrix, aij, compressed row, sparse, parallel
4001 
4002 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4003           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4004 @*/
4005 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4006 {
4007   PetscErrorCode ierr;
4008 
4009   PetscFunctionBegin;
4010   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4011   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4012   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4013   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4014   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4015   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4016   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4017   PetscFunctionReturn(0);
4018 }
4019 
4020 #undef __FUNCT__
4021 #define __FUNCT__ "MatCreateAIJ"
4022 /*@C
4023    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4024    (the default parallel PETSc format).  For good matrix assembly performance
4025    the user should preallocate the matrix storage by setting the parameters
4026    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4027    performance can be increased by more than a factor of 50.
4028 
4029    Collective on MPI_Comm
4030 
4031    Input Parameters:
4032 +  comm - MPI communicator
4033 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4034            This value should be the same as the local size used in creating the
4035            y vector for the matrix-vector product y = Ax.
4036 .  n - This value should be the same as the local size used in creating the
4037        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4038        calculated if N is given) For square matrices n is almost always m.
4039 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4040 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4041 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4042            (same value is used for all local rows)
4043 .  d_nnz - array containing the number of nonzeros in the various rows of the
4044            DIAGONAL portion of the local submatrix (possibly different for each row)
4045            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4046            The size of this array is equal to the number of local rows, i.e 'm'.
4047 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4048            submatrix (same value is used for all local rows).
4049 -  o_nnz - array containing the number of nonzeros in the various rows of the
4050            OFF-DIAGONAL portion of the local submatrix (possibly different for
4051            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4052            structure. The size of this array is equal to the number
4053            of local rows, i.e 'm'.
4054 
4055    Output Parameter:
4056 .  A - the matrix
4057 
4058    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4059    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4060    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4061 
4062    Notes:
4063    If the *_nnz parameter is given then the *_nz parameter is ignored
4064 
4065    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4066    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4067    storage requirements for this matrix.
4068 
4069    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4070    processor than it must be used on all processors that share the object for
4071    that argument.
4072 
4073    The user MUST specify either the local or global matrix dimensions
4074    (possibly both).
4075 
4076    The parallel matrix is partitioned across processors such that the
4077    first m0 rows belong to process 0, the next m1 rows belong to
4078    process 1, the next m2 rows belong to process 2 etc.. where
4079    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4080    values corresponding to [m x N] submatrix.
4081 
4082    The columns are logically partitioned with the n0 columns belonging
4083    to 0th partition, the next n1 columns belonging to the next
4084    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4085 
4086    The DIAGONAL portion of the local submatrix on any given processor
4087    is the submatrix corresponding to the rows and columns m,n
4088    corresponding to the given processor. i.e diagonal matrix on
4089    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4090    etc. The remaining portion of the local submatrix [m x (N-n)]
4091    constitute the OFF-DIAGONAL portion. The example below better
4092    illustrates this concept.
4093 
4094    For a square global matrix we define each processor's diagonal portion
4095    to be its local rows and the corresponding columns (a square submatrix);
4096    each processor's off-diagonal portion encompasses the remainder of the
4097    local matrix (a rectangular submatrix).
4098 
4099    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4100 
4101    When calling this routine with a single process communicator, a matrix of
4102    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4103    type of communicator, use the construction mechanism:
4104      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4105 
4106    By default, this format uses inodes (identical nodes) when possible.
4107    We search for consecutive rows with the same nonzero structure, thereby
4108    reusing matrix information to achieve increased efficiency.
4109 
4110    Options Database Keys:
4111 +  -mat_no_inode  - Do not use inodes
4112 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4113 -  -mat_aij_oneindex - Internally use indexing starting at 1
4114         rather than 0.  Note that when calling MatSetValues(),
4115         the user still MUST index entries starting at 0!
4116 
4117 
4118    Example usage:
4119 
4120    Consider the following 8x8 matrix with 34 non-zero values, that is
4121    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4122    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4123    as follows:
4124 
4125 .vb
4126             1  2  0  |  0  3  0  |  0  4
4127     Proc0   0  5  6  |  7  0  0  |  8  0
4128             9  0 10  | 11  0  0  | 12  0
4129     -------------------------------------
4130            13  0 14  | 15 16 17  |  0  0
4131     Proc1   0 18  0  | 19 20 21  |  0  0
4132             0  0  0  | 22 23  0  | 24  0
4133     -------------------------------------
4134     Proc2  25 26 27  |  0  0 28  | 29  0
4135            30  0  0  | 31 32 33  |  0 34
4136 .ve
4137 
4138    This can be represented as a collection of submatrices as:
4139 
4140 .vb
4141       A B C
4142       D E F
4143       G H I
4144 .ve
4145 
4146    Where the submatrices A,B,C are owned by proc0, D,E,F are
4147    owned by proc1, G,H,I are owned by proc2.
4148 
4149    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4150    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4151    The 'M','N' parameters are 8,8, and have the same values on all procs.
4152 
4153    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4154    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4155    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4156    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4157    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4158    matrix, ans [DF] as another SeqAIJ matrix.
4159 
4160    When d_nz, o_nz parameters are specified, d_nz storage elements are
4161    allocated for every row of the local diagonal submatrix, and o_nz
4162    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4163    One way to choose d_nz and o_nz is to use the max nonzerors per local
4164    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4165    In this case, the values of d_nz,o_nz are:
4166 .vb
4167      proc0 : dnz = 2, o_nz = 2
4168      proc1 : dnz = 3, o_nz = 2
4169      proc2 : dnz = 1, o_nz = 4
4170 .ve
4171    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4172    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4173    for proc3. i.e we are using 12+15+10=37 storage locations to store
4174    34 values.
4175 
4176    When d_nnz, o_nnz parameters are specified, the storage is specified
4177    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4178    In the above case the values for d_nnz,o_nnz are:
4179 .vb
4180      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4181      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4182      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4183 .ve
4184    Here the space allocated is sum of all the above values i.e 34, and
4185    hence pre-allocation is perfect.
4186 
4187    Level: intermediate
4188 
4189 .keywords: matrix, aij, compressed row, sparse, parallel
4190 
4191 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4192           MPIAIJ, MatCreateMPIAIJWithArrays()
4193 @*/
4194 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4195 {
4196   PetscErrorCode ierr;
4197   PetscMPIInt    size;
4198 
4199   PetscFunctionBegin;
4200   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4201   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4202   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4203   if (size > 1) {
4204     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4205     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4206   } else {
4207     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4208     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4209   }
4210   PetscFunctionReturn(0);
4211 }
4212 
4213 #undef __FUNCT__
4214 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4215 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4216 {
4217   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4218 
4219   PetscFunctionBegin;
4220   *Ad     = a->A;
4221   *Ao     = a->B;
4222   *colmap = a->garray;
4223   PetscFunctionReturn(0);
4224 }
4225 
4226 #undef __FUNCT__
4227 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4228 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4229 {
4230   PetscErrorCode ierr;
4231   PetscInt       i;
4232   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4233 
4234   PetscFunctionBegin;
4235   if (coloring->ctype == IS_COLORING_GLOBAL) {
4236     ISColoringValue *allcolors,*colors;
4237     ISColoring      ocoloring;
4238 
4239     /* set coloring for diagonal portion */
4240     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4241 
4242     /* set coloring for off-diagonal portion */
4243     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4244     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4245     for (i=0; i<a->B->cmap->n; i++) {
4246       colors[i] = allcolors[a->garray[i]];
4247     }
4248     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4249     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4250     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4251     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4252   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4253     ISColoringValue *colors;
4254     PetscInt        *larray;
4255     ISColoring      ocoloring;
4256 
4257     /* set coloring for diagonal portion */
4258     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4259     for (i=0; i<a->A->cmap->n; i++) {
4260       larray[i] = i + A->cmap->rstart;
4261     }
4262     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4263     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4264     for (i=0; i<a->A->cmap->n; i++) {
4265       colors[i] = coloring->colors[larray[i]];
4266     }
4267     ierr = PetscFree(larray);CHKERRQ(ierr);
4268     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4269     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4270     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4271 
4272     /* set coloring for off-diagonal portion */
4273     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4274     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4275     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4276     for (i=0; i<a->B->cmap->n; i++) {
4277       colors[i] = coloring->colors[larray[i]];
4278     }
4279     ierr = PetscFree(larray);CHKERRQ(ierr);
4280     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4281     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4282     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4283   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4284 
4285   PetscFunctionReturn(0);
4286 }
4287 
4288 #if defined(PETSC_HAVE_ADIC)
4289 #undef __FUNCT__
4290 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4291 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4292 {
4293   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4294   PetscErrorCode ierr;
4295 
4296   PetscFunctionBegin;
4297   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4298   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4299   PetscFunctionReturn(0);
4300 }
4301 #endif
4302 
4303 #undef __FUNCT__
4304 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4305 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4306 {
4307   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4308   PetscErrorCode ierr;
4309 
4310   PetscFunctionBegin;
4311   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4312   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4313   PetscFunctionReturn(0);
4314 }
4315 
4316 #undef __FUNCT__
4317 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4318 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4319 {
4320   PetscErrorCode ierr;
4321   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4322   PetscInt       *indx;
4323 
4324   PetscFunctionBegin;
4325   /* This routine will ONLY return MPIAIJ type matrix */
4326   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4327   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4328   if (n == PETSC_DECIDE){
4329     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4330   }
4331   /* Check sum(n) = N */
4332   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4333   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4334 
4335   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4336   rstart -= m;
4337 
4338   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4339   for (i=0;i<m;i++) {
4340     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4341     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4342     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4343   }
4344 
4345   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4346   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4347   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4348   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4349   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4350   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4351   PetscFunctionReturn(0);
4352 }
4353 
4354 #undef __FUNCT__
4355 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4356 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4357 {
4358   PetscErrorCode ierr;
4359   PetscInt       m,N,i,rstart,nnz,Ii;
4360   PetscInt       *indx;
4361   PetscScalar    *values;
4362 
4363   PetscFunctionBegin;
4364   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4365   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4366   for (i=0;i<m;i++) {
4367     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4368     Ii    = i + rstart;
4369     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4370     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4371   }
4372   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4373   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4374   PetscFunctionReturn(0);
4375 }
4376 
4377 #undef __FUNCT__
4378 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4379 /*@
4380       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4381                  matrices from each processor
4382 
4383     Collective on MPI_Comm
4384 
4385    Input Parameters:
4386 +    comm - the communicators the parallel matrix will live on
4387 .    inmat - the input sequential matrices
4388 .    n - number of local columns (or PETSC_DECIDE)
4389 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4390 
4391    Output Parameter:
4392 .    outmat - the parallel matrix generated
4393 
4394     Level: advanced
4395 
4396    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4397 
4398 @*/
4399 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4400 {
4401   PetscErrorCode ierr;
4402 
4403   PetscFunctionBegin;
4404   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4405   if (scall == MAT_INITIAL_MATRIX){
4406     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4407   }
4408   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4409   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4410   PetscFunctionReturn(0);
4411 }
4412 
4413 #undef __FUNCT__
4414 #define __FUNCT__ "MatFileSplit"
4415 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4416 {
4417   PetscErrorCode    ierr;
4418   PetscMPIInt       rank;
4419   PetscInt          m,N,i,rstart,nnz;
4420   size_t            len;
4421   const PetscInt    *indx;
4422   PetscViewer       out;
4423   char              *name;
4424   Mat               B;
4425   const PetscScalar *values;
4426 
4427   PetscFunctionBegin;
4428   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4429   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4430   /* Should this be the type of the diagonal block of A? */
4431   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4432   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4433   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4434   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4435   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4436   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4437   for (i=0;i<m;i++) {
4438     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4439     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4440     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4441   }
4442   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4443   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4444 
4445   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4446   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4447   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4448   sprintf(name,"%s.%d",outfile,rank);
4449   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4450   ierr = PetscFree(name);
4451   ierr = MatView(B,out);CHKERRQ(ierr);
4452   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4453   ierr = MatDestroy(&B);CHKERRQ(ierr);
4454   PetscFunctionReturn(0);
4455 }
4456 
4457 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4458 #undef __FUNCT__
4459 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4460 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4461 {
4462   PetscErrorCode       ierr;
4463   Mat_Merge_SeqsToMPI  *merge;
4464   PetscContainer       container;
4465 
4466   PetscFunctionBegin;
4467   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4468   if (container) {
4469     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4470     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4482     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4483     ierr = PetscFree(merge);CHKERRQ(ierr);
4484     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4485   }
4486   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4487   PetscFunctionReturn(0);
4488 }
4489 
4490 #include <../src/mat/utils/freespace.h>
4491 #include <petscbt.h>
4492 
4493 #undef __FUNCT__
4494 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4495 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4496 {
4497   PetscErrorCode       ierr;
4498   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4499   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4500   PetscMPIInt          size,rank,taga,*len_s;
4501   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4502   PetscInt             proc,m;
4503   PetscInt             **buf_ri,**buf_rj;
4504   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4505   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4506   MPI_Request          *s_waits,*r_waits;
4507   MPI_Status           *status;
4508   MatScalar            *aa=a->a;
4509   MatScalar            **abuf_r,*ba_i;
4510   Mat_Merge_SeqsToMPI  *merge;
4511   PetscContainer       container;
4512 
4513   PetscFunctionBegin;
4514   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4515 
4516   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4517   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4518 
4519   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4520   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4521 
4522   bi     = merge->bi;
4523   bj     = merge->bj;
4524   buf_ri = merge->buf_ri;
4525   buf_rj = merge->buf_rj;
4526 
4527   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4528   owners = merge->rowmap->range;
4529   len_s  = merge->len_s;
4530 
4531   /* send and recv matrix values */
4532   /*-----------------------------*/
4533   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4534   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4535 
4536   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4537   for (proc=0,k=0; proc<size; proc++){
4538     if (!len_s[proc]) continue;
4539     i = owners[proc];
4540     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4541     k++;
4542   }
4543 
4544   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4545   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4546   ierr = PetscFree(status);CHKERRQ(ierr);
4547 
4548   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4549   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4550 
4551   /* insert mat values of mpimat */
4552   /*----------------------------*/
4553   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4554   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4555 
4556   for (k=0; k<merge->nrecv; k++){
4557     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4558     nrows = *(buf_ri_k[k]);
4559     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4560     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4561   }
4562 
4563   /* set values of ba */
4564   m = merge->rowmap->n;
4565   for (i=0; i<m; i++) {
4566     arow = owners[rank] + i;
4567     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4568     bnzi = bi[i+1] - bi[i];
4569     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4570 
4571     /* add local non-zero vals of this proc's seqmat into ba */
4572     anzi = ai[arow+1] - ai[arow];
4573     aj   = a->j + ai[arow];
4574     aa   = a->a + ai[arow];
4575     nextaj = 0;
4576     for (j=0; nextaj<anzi; j++){
4577       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4578         ba_i[j] += aa[nextaj++];
4579       }
4580     }
4581 
4582     /* add received vals into ba */
4583     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4584       /* i-th row */
4585       if (i == *nextrow[k]) {
4586         anzi = *(nextai[k]+1) - *nextai[k];
4587         aj   = buf_rj[k] + *(nextai[k]);
4588         aa   = abuf_r[k] + *(nextai[k]);
4589         nextaj = 0;
4590         for (j=0; nextaj<anzi; j++){
4591           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4592             ba_i[j] += aa[nextaj++];
4593           }
4594         }
4595         nextrow[k]++; nextai[k]++;
4596       }
4597     }
4598     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4599   }
4600   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4601   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4602 
4603   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4604   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4605   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4606   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4607   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4608   PetscFunctionReturn(0);
4609 }
4610 
4611 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4612 
4613 #undef __FUNCT__
4614 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4615 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4616 {
4617   PetscErrorCode       ierr;
4618   Mat                  B_mpi;
4619   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4620   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4621   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4622   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4623   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4624   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4625   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4626   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4627   MPI_Status           *status;
4628   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4629   PetscBT              lnkbt;
4630   Mat_Merge_SeqsToMPI  *merge;
4631   PetscContainer       container;
4632 
4633   PetscFunctionBegin;
4634   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4635 
4636   /* make sure it is a PETSc comm */
4637   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4638   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4639   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4640 
4641   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4642   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4643 
4644   /* determine row ownership */
4645   /*---------------------------------------------------------*/
4646   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4647   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4648   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4649   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4650   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4651   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4652   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4653 
4654   m      = merge->rowmap->n;
4655   M      = merge->rowmap->N;
4656   owners = merge->rowmap->range;
4657 
4658   /* determine the number of messages to send, their lengths */
4659   /*---------------------------------------------------------*/
4660   len_s  = merge->len_s;
4661 
4662   len = 0;  /* length of buf_si[] */
4663   merge->nsend = 0;
4664   for (proc=0; proc<size; proc++){
4665     len_si[proc] = 0;
4666     if (proc == rank){
4667       len_s[proc] = 0;
4668     } else {
4669       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4670       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4671     }
4672     if (len_s[proc]) {
4673       merge->nsend++;
4674       nrows = 0;
4675       for (i=owners[proc]; i<owners[proc+1]; i++){
4676         if (ai[i+1] > ai[i]) nrows++;
4677       }
4678       len_si[proc] = 2*(nrows+1);
4679       len += len_si[proc];
4680     }
4681   }
4682 
4683   /* determine the number and length of messages to receive for ij-structure */
4684   /*-------------------------------------------------------------------------*/
4685   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4686   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4687 
4688   /* post the Irecv of j-structure */
4689   /*-------------------------------*/
4690   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4691   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4692 
4693   /* post the Isend of j-structure */
4694   /*--------------------------------*/
4695   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4696 
4697   for (proc=0, k=0; proc<size; proc++){
4698     if (!len_s[proc]) continue;
4699     i = owners[proc];
4700     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4701     k++;
4702   }
4703 
4704   /* receives and sends of j-structure are complete */
4705   /*------------------------------------------------*/
4706   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4707   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4708 
4709   /* send and recv i-structure */
4710   /*---------------------------*/
4711   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4712   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4713 
4714   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4715   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4716   for (proc=0,k=0; proc<size; proc++){
4717     if (!len_s[proc]) continue;
4718     /* form outgoing message for i-structure:
4719          buf_si[0]:                 nrows to be sent
4720                [1:nrows]:           row index (global)
4721                [nrows+1:2*nrows+1]: i-structure index
4722     */
4723     /*-------------------------------------------*/
4724     nrows = len_si[proc]/2 - 1;
4725     buf_si_i    = buf_si + nrows+1;
4726     buf_si[0]   = nrows;
4727     buf_si_i[0] = 0;
4728     nrows = 0;
4729     for (i=owners[proc]; i<owners[proc+1]; i++){
4730       anzi = ai[i+1] - ai[i];
4731       if (anzi) {
4732         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4733         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4734         nrows++;
4735       }
4736     }
4737     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4738     k++;
4739     buf_si += len_si[proc];
4740   }
4741 
4742   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4743   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4744 
4745   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4746   for (i=0; i<merge->nrecv; i++){
4747     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4748   }
4749 
4750   ierr = PetscFree(len_si);CHKERRQ(ierr);
4751   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4752   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4753   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4754   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4755   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4756   ierr = PetscFree(status);CHKERRQ(ierr);
4757 
4758   /* compute a local seq matrix in each processor */
4759   /*----------------------------------------------*/
4760   /* allocate bi array and free space for accumulating nonzero column info */
4761   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4762   bi[0] = 0;
4763 
4764   /* create and initialize a linked list */
4765   nlnk = N+1;
4766   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4767 
4768   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4769   len = 0;
4770   len  = ai[owners[rank+1]] - ai[owners[rank]];
4771   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4772   current_space = free_space;
4773 
4774   /* determine symbolic info for each local row */
4775   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4776 
4777   for (k=0; k<merge->nrecv; k++){
4778     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4779     nrows = *buf_ri_k[k];
4780     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4781     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4782   }
4783 
4784   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4785   len = 0;
4786   for (i=0;i<m;i++) {
4787     bnzi   = 0;
4788     /* add local non-zero cols of this proc's seqmat into lnk */
4789     arow   = owners[rank] + i;
4790     anzi   = ai[arow+1] - ai[arow];
4791     aj     = a->j + ai[arow];
4792     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4793     bnzi += nlnk;
4794     /* add received col data into lnk */
4795     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4796       if (i == *nextrow[k]) { /* i-th row */
4797         anzi = *(nextai[k]+1) - *nextai[k];
4798         aj   = buf_rj[k] + *nextai[k];
4799         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4800         bnzi += nlnk;
4801         nextrow[k]++; nextai[k]++;
4802       }
4803     }
4804     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4805 
4806     /* if free space is not available, make more free space */
4807     if (current_space->local_remaining<bnzi) {
4808       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4809       nspacedouble++;
4810     }
4811     /* copy data into free space, then initialize lnk */
4812     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4813     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4814 
4815     current_space->array           += bnzi;
4816     current_space->local_used      += bnzi;
4817     current_space->local_remaining -= bnzi;
4818 
4819     bi[i+1] = bi[i] + bnzi;
4820   }
4821 
4822   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4823 
4824   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4825   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4826   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4827 
4828   /* create symbolic parallel matrix B_mpi */
4829   /*---------------------------------------*/
4830     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4831   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4832   if (n==PETSC_DECIDE) {
4833     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4834   } else {
4835     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4836   }
4837   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4838   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4839   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4840   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4841   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4842 
4843   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4844   B_mpi->assembled     = PETSC_FALSE;
4845   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4846   merge->bi            = bi;
4847   merge->bj            = bj;
4848   merge->buf_ri        = buf_ri;
4849   merge->buf_rj        = buf_rj;
4850   merge->coi           = PETSC_NULL;
4851   merge->coj           = PETSC_NULL;
4852   merge->owners_co     = PETSC_NULL;
4853 
4854   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4855 
4856   /* attach the supporting struct to B_mpi for reuse */
4857   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4858   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4859   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4860   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4861   *mpimat = B_mpi;
4862 
4863   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4864   PetscFunctionReturn(0);
4865 }
4866 
4867 #undef __FUNCT__
4868 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4869 /*@C
4870       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4871                  matrices from each processor
4872 
4873     Collective on MPI_Comm
4874 
4875    Input Parameters:
4876 +    comm - the communicators the parallel matrix will live on
4877 .    seqmat - the input sequential matrices
4878 .    m - number of local rows (or PETSC_DECIDE)
4879 .    n - number of local columns (or PETSC_DECIDE)
4880 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4881 
4882    Output Parameter:
4883 .    mpimat - the parallel matrix generated
4884 
4885     Level: advanced
4886 
4887    Notes:
4888      The dimensions of the sequential matrix in each processor MUST be the same.
4889      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4890      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4891 @*/
4892 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4893 {
4894   PetscErrorCode   ierr;
4895   PetscMPIInt     size;
4896 
4897   PetscFunctionBegin;
4898   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4899   if (size == 1){
4900      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4901      if (scall == MAT_INITIAL_MATRIX){
4902        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4903      } else {
4904        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4905      }
4906      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4907      PetscFunctionReturn(0);
4908   }
4909   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4910   if (scall == MAT_INITIAL_MATRIX){
4911     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4912   }
4913   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4914   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4915   PetscFunctionReturn(0);
4916 }
4917 
4918 #undef __FUNCT__
4919 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4920 /*@
4921      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4922           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4923           with MatGetSize()
4924 
4925     Not Collective
4926 
4927    Input Parameters:
4928 +    A - the matrix
4929 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4930 
4931    Output Parameter:
4932 .    A_loc - the local sequential matrix generated
4933 
4934     Level: developer
4935 
4936 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4937 
4938 @*/
4939 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4940 {
4941   PetscErrorCode  ierr;
4942   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4943   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4944   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4945   MatScalar       *aa=a->a,*ba=b->a,*cam;
4946   PetscScalar     *ca;
4947   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4948   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4949   PetscBool       match;
4950 
4951   PetscFunctionBegin;
4952   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4953   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4954   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4955   if (scall == MAT_INITIAL_MATRIX){
4956     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4957     ci[0] = 0;
4958     for (i=0; i<am; i++){
4959       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4960     }
4961     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4962     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4963     k = 0;
4964     for (i=0; i<am; i++) {
4965       ncols_o = bi[i+1] - bi[i];
4966       ncols_d = ai[i+1] - ai[i];
4967       /* off-diagonal portion of A */
4968       for (jo=0; jo<ncols_o; jo++) {
4969         col = cmap[*bj];
4970         if (col >= cstart) break;
4971         cj[k]   = col; bj++;
4972         ca[k++] = *ba++;
4973       }
4974       /* diagonal portion of A */
4975       for (j=0; j<ncols_d; j++) {
4976         cj[k]   = cstart + *aj++;
4977         ca[k++] = *aa++;
4978       }
4979       /* off-diagonal portion of A */
4980       for (j=jo; j<ncols_o; j++) {
4981         cj[k]   = cmap[*bj++];
4982         ca[k++] = *ba++;
4983       }
4984     }
4985     /* put together the new matrix */
4986     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4987     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4988     /* Since these are PETSc arrays, change flags to free them as necessary. */
4989     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4990     mat->free_a  = PETSC_TRUE;
4991     mat->free_ij = PETSC_TRUE;
4992     mat->nonew   = 0;
4993   } else if (scall == MAT_REUSE_MATRIX){
4994     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4995     ci = mat->i; cj = mat->j; cam = mat->a;
4996     for (i=0; i<am; i++) {
4997       /* off-diagonal portion of A */
4998       ncols_o = bi[i+1] - bi[i];
4999       for (jo=0; jo<ncols_o; jo++) {
5000         col = cmap[*bj];
5001         if (col >= cstart) break;
5002         *cam++ = *ba++; bj++;
5003       }
5004       /* diagonal portion of A */
5005       ncols_d = ai[i+1] - ai[i];
5006       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5007       /* off-diagonal portion of A */
5008       for (j=jo; j<ncols_o; j++) {
5009         *cam++ = *ba++; bj++;
5010       }
5011     }
5012   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5013   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5014   PetscFunctionReturn(0);
5015 }
5016 
5017 #undef __FUNCT__
5018 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5019 /*@C
5020      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5021 
5022     Not Collective
5023 
5024    Input Parameters:
5025 +    A - the matrix
5026 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5027 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5028 
5029    Output Parameter:
5030 .    A_loc - the local sequential matrix generated
5031 
5032     Level: developer
5033 
5034 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5035 
5036 @*/
5037 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5038 {
5039   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5040   PetscErrorCode    ierr;
5041   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5042   IS                isrowa,iscola;
5043   Mat               *aloc;
5044   PetscBool       match;
5045 
5046   PetscFunctionBegin;
5047   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5048   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5049   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5050   if (!row){
5051     start = A->rmap->rstart; end = A->rmap->rend;
5052     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5053   } else {
5054     isrowa = *row;
5055   }
5056   if (!col){
5057     start = A->cmap->rstart;
5058     cmap  = a->garray;
5059     nzA   = a->A->cmap->n;
5060     nzB   = a->B->cmap->n;
5061     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5062     ncols = 0;
5063     for (i=0; i<nzB; i++) {
5064       if (cmap[i] < start) idx[ncols++] = cmap[i];
5065       else break;
5066     }
5067     imark = i;
5068     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5069     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5070     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5071   } else {
5072     iscola = *col;
5073   }
5074   if (scall != MAT_INITIAL_MATRIX){
5075     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5076     aloc[0] = *A_loc;
5077   }
5078   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5079   *A_loc = aloc[0];
5080   ierr = PetscFree(aloc);CHKERRQ(ierr);
5081   if (!row){
5082     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5083   }
5084   if (!col){
5085     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5086   }
5087   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5088   PetscFunctionReturn(0);
5089 }
5090 
5091 #undef __FUNCT__
5092 #define __FUNCT__ "MatGetBrowsOfAcols"
5093 /*@C
5094     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5095 
5096     Collective on Mat
5097 
5098    Input Parameters:
5099 +    A,B - the matrices in mpiaij format
5100 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5101 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5102 
5103    Output Parameter:
5104 +    rowb, colb - index sets of rows and columns of B to extract
5105 -    B_seq - the sequential matrix generated
5106 
5107     Level: developer
5108 
5109 @*/
5110 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5111 {
5112   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5113   PetscErrorCode    ierr;
5114   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5115   IS                isrowb,iscolb;
5116   Mat               *bseq=PETSC_NULL;
5117 
5118   PetscFunctionBegin;
5119   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5120     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5121   }
5122   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5123 
5124   if (scall == MAT_INITIAL_MATRIX){
5125     start = A->cmap->rstart;
5126     cmap  = a->garray;
5127     nzA   = a->A->cmap->n;
5128     nzB   = a->B->cmap->n;
5129     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5130     ncols = 0;
5131     for (i=0; i<nzB; i++) {  /* row < local row index */
5132       if (cmap[i] < start) idx[ncols++] = cmap[i];
5133       else break;
5134     }
5135     imark = i;
5136     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5137     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5138     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5139     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5140   } else {
5141     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5142     isrowb = *rowb; iscolb = *colb;
5143     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5144     bseq[0] = *B_seq;
5145   }
5146   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5147   *B_seq = bseq[0];
5148   ierr = PetscFree(bseq);CHKERRQ(ierr);
5149   if (!rowb){
5150     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5151   } else {
5152     *rowb = isrowb;
5153   }
5154   if (!colb){
5155     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5156   } else {
5157     *colb = iscolb;
5158   }
5159   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5160   PetscFunctionReturn(0);
5161 }
5162 
5163 #undef __FUNCT__
5164 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5165 /*
5166     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5167     of the OFF-DIAGONAL portion of local A
5168 
5169     Collective on Mat
5170 
5171    Input Parameters:
5172 +    A,B - the matrices in mpiaij format
5173 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5174 
5175    Output Parameter:
5176 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5177 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5178 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5179 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5180 
5181     Level: developer
5182 
5183 */
5184 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5185 {
5186   VecScatter_MPI_General *gen_to,*gen_from;
5187   PetscErrorCode         ierr;
5188   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5189   Mat_SeqAIJ             *b_oth;
5190   VecScatter             ctx=a->Mvctx;
5191   MPI_Comm               comm=((PetscObject)ctx)->comm;
5192   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5193   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5194   PetscScalar            *rvalues,*svalues;
5195   MatScalar              *b_otha,*bufa,*bufA;
5196   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5197   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5198   MPI_Status             *sstatus,rstatus;
5199   PetscMPIInt            jj;
5200   PetscInt               *cols,sbs,rbs;
5201   PetscScalar            *vals;
5202 
5203   PetscFunctionBegin;
5204   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5205     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5206   }
5207   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5208   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5209 
5210   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5211   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5212   rvalues  = gen_from->values; /* holds the length of receiving row */
5213   svalues  = gen_to->values;   /* holds the length of sending row */
5214   nrecvs   = gen_from->n;
5215   nsends   = gen_to->n;
5216 
5217   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5218   srow     = gen_to->indices;   /* local row index to be sent */
5219   sstarts  = gen_to->starts;
5220   sprocs   = gen_to->procs;
5221   sstatus  = gen_to->sstatus;
5222   sbs      = gen_to->bs;
5223   rstarts  = gen_from->starts;
5224   rprocs   = gen_from->procs;
5225   rbs      = gen_from->bs;
5226 
5227   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5228   if (scall == MAT_INITIAL_MATRIX){
5229     /* i-array */
5230     /*---------*/
5231     /*  post receives */
5232     for (i=0; i<nrecvs; i++){
5233       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5234       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5235       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5236     }
5237 
5238     /* pack the outgoing message */
5239     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5240     sstartsj[0] = 0;  rstartsj[0] = 0;
5241     len = 0; /* total length of j or a array to be sent */
5242     k = 0;
5243     for (i=0; i<nsends; i++){
5244       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5245       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5246       for (j=0; j<nrows; j++) {
5247         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5248         for (l=0; l<sbs; l++){
5249           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5250           rowlen[j*sbs+l] = ncols;
5251           len += ncols;
5252           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5253         }
5254         k++;
5255       }
5256       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5257       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5258     }
5259     /* recvs and sends of i-array are completed */
5260     i = nrecvs;
5261     while (i--) {
5262       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5263     }
5264     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5265 
5266     /* allocate buffers for sending j and a arrays */
5267     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5268     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5269 
5270     /* create i-array of B_oth */
5271     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5272     b_othi[0] = 0;
5273     len = 0; /* total length of j or a array to be received */
5274     k = 0;
5275     for (i=0; i<nrecvs; i++){
5276       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5277       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5278       for (j=0; j<nrows; j++) {
5279         b_othi[k+1] = b_othi[k] + rowlen[j];
5280         len += rowlen[j]; k++;
5281       }
5282       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5283     }
5284 
5285     /* allocate space for j and a arrrays of B_oth */
5286     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5287     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5288 
5289     /* j-array */
5290     /*---------*/
5291     /*  post receives of j-array */
5292     for (i=0; i<nrecvs; i++){
5293       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5294       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5295     }
5296 
5297     /* pack the outgoing message j-array */
5298     k = 0;
5299     for (i=0; i<nsends; i++){
5300       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5301       bufJ = bufj+sstartsj[i];
5302       for (j=0; j<nrows; j++) {
5303         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5304         for (ll=0; ll<sbs; ll++){
5305           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5306           for (l=0; l<ncols; l++){
5307             *bufJ++ = cols[l];
5308           }
5309           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5310         }
5311       }
5312       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5313     }
5314 
5315     /* recvs and sends of j-array are completed */
5316     i = nrecvs;
5317     while (i--) {
5318       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5319     }
5320     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5321   } else if (scall == MAT_REUSE_MATRIX){
5322     sstartsj = *startsj_s;
5323     rstartsj = *startsj_r;
5324     bufa     = *bufa_ptr;
5325     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5326     b_otha   = b_oth->a;
5327   } else {
5328     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5329   }
5330 
5331   /* a-array */
5332   /*---------*/
5333   /*  post receives of a-array */
5334   for (i=0; i<nrecvs; i++){
5335     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5336     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5337   }
5338 
5339   /* pack the outgoing message a-array */
5340   k = 0;
5341   for (i=0; i<nsends; i++){
5342     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5343     bufA = bufa+sstartsj[i];
5344     for (j=0; j<nrows; j++) {
5345       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5346       for (ll=0; ll<sbs; ll++){
5347         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5348         for (l=0; l<ncols; l++){
5349           *bufA++ = vals[l];
5350         }
5351         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5352       }
5353     }
5354     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5355   }
5356   /* recvs and sends of a-array are completed */
5357   i = nrecvs;
5358   while (i--) {
5359     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5360   }
5361   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5362   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5363 
5364   if (scall == MAT_INITIAL_MATRIX){
5365     /* put together the new matrix */
5366     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5367 
5368     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5369     /* Since these are PETSc arrays, change flags to free them as necessary. */
5370     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5371     b_oth->free_a  = PETSC_TRUE;
5372     b_oth->free_ij = PETSC_TRUE;
5373     b_oth->nonew   = 0;
5374 
5375     ierr = PetscFree(bufj);CHKERRQ(ierr);
5376     if (!startsj_s || !bufa_ptr){
5377       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5378       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5379     } else {
5380       *startsj_s = sstartsj;
5381       *startsj_r = rstartsj;
5382       *bufa_ptr  = bufa;
5383     }
5384   }
5385   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5386   PetscFunctionReturn(0);
5387 }
5388 
5389 #undef __FUNCT__
5390 #define __FUNCT__ "MatGetCommunicationStructs"
5391 /*@C
5392   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5393 
5394   Not Collective
5395 
5396   Input Parameters:
5397 . A - The matrix in mpiaij format
5398 
5399   Output Parameter:
5400 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5401 . colmap - A map from global column index to local index into lvec
5402 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5403 
5404   Level: developer
5405 
5406 @*/
5407 #if defined (PETSC_USE_CTABLE)
5408 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5409 #else
5410 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5411 #endif
5412 {
5413   Mat_MPIAIJ *a;
5414 
5415   PetscFunctionBegin;
5416   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5417   PetscValidPointer(lvec, 2);
5418   PetscValidPointer(colmap, 3);
5419   PetscValidPointer(multScatter, 4);
5420   a = (Mat_MPIAIJ *) A->data;
5421   if (lvec) *lvec = a->lvec;
5422   if (colmap) *colmap = a->colmap;
5423   if (multScatter) *multScatter = a->Mvctx;
5424   PetscFunctionReturn(0);
5425 }
5426 
5427 EXTERN_C_BEGIN
5428 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5429 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5430 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5431 EXTERN_C_END
5432 
5433 #undef __FUNCT__
5434 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5435 /*
5436     Computes (B'*A')' since computing B*A directly is untenable
5437 
5438                n                       p                          p
5439         (              )       (              )         (                  )
5440       m (      A       )  *  n (       B      )   =   m (         C        )
5441         (              )       (              )         (                  )
5442 
5443 */
5444 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5445 {
5446   PetscErrorCode     ierr;
5447   Mat                At,Bt,Ct;
5448 
5449   PetscFunctionBegin;
5450   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5451   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5452   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5453   ierr = MatDestroy(&At);CHKERRQ(ierr);
5454   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5455   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5456   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5457   PetscFunctionReturn(0);
5458 }
5459 
5460 #undef __FUNCT__
5461 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5462 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5463 {
5464   PetscErrorCode ierr;
5465   PetscInt       m=A->rmap->n,n=B->cmap->n;
5466   Mat            Cmat;
5467 
5468   PetscFunctionBegin;
5469   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5470   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5471   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5472   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5473   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5474   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5475   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5476   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5477   *C   = Cmat;
5478   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5479   PetscFunctionReturn(0);
5480 }
5481 
5482 /* ----------------------------------------------------------------*/
5483 #undef __FUNCT__
5484 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5485 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5486 {
5487   PetscErrorCode ierr;
5488 
5489   PetscFunctionBegin;
5490   if (scall == MAT_INITIAL_MATRIX){
5491     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5492   }
5493   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5494   PetscFunctionReturn(0);
5495 }
5496 
5497 EXTERN_C_BEGIN
5498 #if defined(PETSC_HAVE_MUMPS)
5499 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5500 #endif
5501 #if defined(PETSC_HAVE_PASTIX)
5502 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5503 #endif
5504 #if defined(PETSC_HAVE_SUPERLU_DIST)
5505 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5506 #endif
5507 #if defined(PETSC_HAVE_SPOOLES)
5508 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5509 #endif
5510 EXTERN_C_END
5511 
5512 /*MC
5513    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5514 
5515    Options Database Keys:
5516 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5517 
5518   Level: beginner
5519 
5520 .seealso: MatCreateAIJ()
5521 M*/
5522 
5523 EXTERN_C_BEGIN
5524 #undef __FUNCT__
5525 #define __FUNCT__ "MatCreate_MPIAIJ"
5526 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5527 {
5528   Mat_MPIAIJ     *b;
5529   PetscErrorCode ierr;
5530   PetscMPIInt    size;
5531 
5532   PetscFunctionBegin;
5533   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5534 
5535   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5536   B->data         = (void*)b;
5537   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5538   B->assembled    = PETSC_FALSE;
5539 
5540   B->insertmode   = NOT_SET_VALUES;
5541   b->size         = size;
5542   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5543 
5544   /* build cache for off array entries formed */
5545   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5546   b->donotstash  = PETSC_FALSE;
5547   b->colmap      = 0;
5548   b->garray      = 0;
5549   b->roworiented = PETSC_TRUE;
5550 
5551   /* stuff used for matrix vector multiply */
5552   b->lvec      = PETSC_NULL;
5553   b->Mvctx     = PETSC_NULL;
5554 
5555   /* stuff for MatGetRow() */
5556   b->rowindices   = 0;
5557   b->rowvalues    = 0;
5558   b->getrowactive = PETSC_FALSE;
5559 
5560 #if defined(PETSC_HAVE_SPOOLES)
5561   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5562                                      "MatGetFactor_mpiaij_spooles",
5563                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5564 #endif
5565 #if defined(PETSC_HAVE_MUMPS)
5566   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5567                                      "MatGetFactor_aij_mumps",
5568                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5569 #endif
5570 #if defined(PETSC_HAVE_PASTIX)
5571   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5572 					   "MatGetFactor_mpiaij_pastix",
5573 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5574 #endif
5575 #if defined(PETSC_HAVE_SUPERLU_DIST)
5576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5577                                      "MatGetFactor_mpiaij_superlu_dist",
5578                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5579 #endif
5580   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5581                                      "MatStoreValues_MPIAIJ",
5582                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5583   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5584                                      "MatRetrieveValues_MPIAIJ",
5585                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5586   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5587 				     "MatGetDiagonalBlock_MPIAIJ",
5588                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5590 				     "MatIsTranspose_MPIAIJ",
5591 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5592   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5593 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5594 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5596 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5597 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5599 				     "MatDiagonalScaleLocal_MPIAIJ",
5600 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5602                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5603                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5605                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5606                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5608                                      "MatConvert_MPIAIJ_MPISBAIJ",
5609                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5611                                      "MatMatMult_MPIDense_MPIAIJ",
5612                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5614                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5615                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5617                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5618                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5620   PetscFunctionReturn(0);
5621 }
5622 EXTERN_C_END
5623 
5624 #undef __FUNCT__
5625 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5626 /*@
5627      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5628          and "off-diagonal" part of the matrix in CSR format.
5629 
5630    Collective on MPI_Comm
5631 
5632    Input Parameters:
5633 +  comm - MPI communicator
5634 .  m - number of local rows (Cannot be PETSC_DECIDE)
5635 .  n - This value should be the same as the local size used in creating the
5636        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5637        calculated if N is given) For square matrices n is almost always m.
5638 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5639 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5640 .   i - row indices for "diagonal" portion of matrix
5641 .   j - column indices
5642 .   a - matrix values
5643 .   oi - row indices for "off-diagonal" portion of matrix
5644 .   oj - column indices
5645 -   oa - matrix values
5646 
5647    Output Parameter:
5648 .   mat - the matrix
5649 
5650    Level: advanced
5651 
5652    Notes:
5653        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5654        must free the arrays once the matrix has been destroyed and not before.
5655 
5656        The i and j indices are 0 based
5657 
5658        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5659 
5660        This sets local rows and cannot be used to set off-processor values.
5661 
5662        You cannot later use MatSetValues() to change values in this matrix.
5663 
5664 .keywords: matrix, aij, compressed row, sparse, parallel
5665 
5666 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5667           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5668 @*/
5669 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5670 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5671 {
5672   PetscErrorCode ierr;
5673   Mat_MPIAIJ     *maij;
5674 
5675  PetscFunctionBegin;
5676   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5677   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5678   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5679   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5680   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5681   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5682   maij = (Mat_MPIAIJ*) (*mat)->data;
5683   maij->donotstash     = PETSC_TRUE;
5684   (*mat)->preallocated = PETSC_TRUE;
5685 
5686   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5687   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5688 
5689   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5690   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5691 
5692   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5693   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5694   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5695   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696 
5697   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5698   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699   PetscFunctionReturn(0);
5700 }
5701 
5702 /*
5703     Special version for direct calls from Fortran
5704 */
5705 #include <petsc-private/fortranimpl.h>
5706 
5707 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5708 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5709 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5710 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5711 #endif
5712 
5713 /* Change these macros so can be used in void function */
5714 #undef CHKERRQ
5715 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5716 #undef SETERRQ2
5717 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5718 #undef SETERRQ3
5719 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5720 #undef SETERRQ
5721 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5722 
5723 EXTERN_C_BEGIN
5724 #undef __FUNCT__
5725 #define __FUNCT__ "matsetvaluesmpiaij_"
5726 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5727 {
5728   Mat             mat = *mmat;
5729   PetscInt        m = *mm, n = *mn;
5730   InsertMode      addv = *maddv;
5731   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5732   PetscScalar     value;
5733   PetscErrorCode  ierr;
5734 
5735   MatCheckPreallocated(mat,1);
5736   if (mat->insertmode == NOT_SET_VALUES) {
5737     mat->insertmode = addv;
5738   }
5739 #if defined(PETSC_USE_DEBUG)
5740   else if (mat->insertmode != addv) {
5741     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5742   }
5743 #endif
5744   {
5745   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5746   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5747   PetscBool       roworiented = aij->roworiented;
5748 
5749   /* Some Variables required in the macro */
5750   Mat             A = aij->A;
5751   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5752   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5753   MatScalar       *aa = a->a;
5754   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5755   Mat             B = aij->B;
5756   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5757   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5758   MatScalar       *ba = b->a;
5759 
5760   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5761   PetscInt        nonew = a->nonew;
5762   MatScalar       *ap1,*ap2;
5763 
5764   PetscFunctionBegin;
5765   for (i=0; i<m; i++) {
5766     if (im[i] < 0) continue;
5767 #if defined(PETSC_USE_DEBUG)
5768     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5769 #endif
5770     if (im[i] >= rstart && im[i] < rend) {
5771       row      = im[i] - rstart;
5772       lastcol1 = -1;
5773       rp1      = aj + ai[row];
5774       ap1      = aa + ai[row];
5775       rmax1    = aimax[row];
5776       nrow1    = ailen[row];
5777       low1     = 0;
5778       high1    = nrow1;
5779       lastcol2 = -1;
5780       rp2      = bj + bi[row];
5781       ap2      = ba + bi[row];
5782       rmax2    = bimax[row];
5783       nrow2    = bilen[row];
5784       low2     = 0;
5785       high2    = nrow2;
5786 
5787       for (j=0; j<n; j++) {
5788         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5789         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5790         if (in[j] >= cstart && in[j] < cend){
5791           col = in[j] - cstart;
5792           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5793         } else if (in[j] < 0) continue;
5794 #if defined(PETSC_USE_DEBUG)
5795         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5796 #endif
5797         else {
5798           if (mat->was_assembled) {
5799             if (!aij->colmap) {
5800               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5801             }
5802 #if defined (PETSC_USE_CTABLE)
5803             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5804 	    col--;
5805 #else
5806             col = aij->colmap[in[j]] - 1;
5807 #endif
5808             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5809               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5810               col =  in[j];
5811               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5812               B = aij->B;
5813               b = (Mat_SeqAIJ*)B->data;
5814               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5815               rp2      = bj + bi[row];
5816               ap2      = ba + bi[row];
5817               rmax2    = bimax[row];
5818               nrow2    = bilen[row];
5819               low2     = 0;
5820               high2    = nrow2;
5821               bm       = aij->B->rmap->n;
5822               ba = b->a;
5823             }
5824           } else col = in[j];
5825           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5826         }
5827       }
5828     } else {
5829       if (!aij->donotstash) {
5830         if (roworiented) {
5831           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5832         } else {
5833           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5834         }
5835       }
5836     }
5837   }}
5838   PetscFunctionReturnVoid();
5839 }
5840 EXTERN_C_END
5841 
5842