xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 20ee4afdd172069d8eaf9f15c8f8db143ef17469)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302   FILE              *file;
1303 
1304   PetscFunctionBegin;
1305   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1306   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1307   nz   = A->nz + B->nz;
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) {
1319       rlen = PetscMax(rlen,range[i+1] - range[i]);
1320     }
1321   } else {
1322     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1323     rlen = mat->rmap->n;
1324   }
1325 
1326   /* load up the local row counts */
1327   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1328   for (i=0; i<mat->rmap->n; i++) {
1329     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1330   }
1331 
1332   /* store the row lengths to the file */
1333   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1334   if (!rank) {
1335     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     for (i=1; i<size; i++) {
1337       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1338       rlen = range[i+1] - range[i];
1339       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1345     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1346     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1347   }
1348   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1349 
1350   /* load up the local column indices */
1351   nzmax = nz; /* )th processor needs space a largest processor needs */
1352   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1353   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1354   cnt  = 0;
1355   for (i=0; i<mat->rmap->n; i++) {
1356     for (j=B->i[i]; j<B->i[i+1]; j++) {
1357       if ( (col = garray[B->j[j]]) > cstart) break;
1358       column_indices[cnt++] = col;
1359     }
1360     for (k=A->i[i]; k<A->i[i+1]; k++) {
1361       column_indices[cnt++] = A->j[k] + cstart;
1362     }
1363     for (; j<B->i[i+1]; j++) {
1364       column_indices[cnt++] = garray[B->j[j]];
1365     }
1366   }
1367   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1368 
1369   /* store the column indices to the file */
1370    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1371   if (!rank) {
1372     MPI_Status status;
1373     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     for (i=1; i<size; i++) {
1375       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1376       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1377       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1378       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1380     }
1381      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1382   } else {
1383     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1384     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1386     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1387   }
1388   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1389 
1390   /* load up the local column values */
1391   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1392   cnt  = 0;
1393   for (i=0; i<mat->rmap->n; i++) {
1394     for (j=B->i[i]; j<B->i[i+1]; j++) {
1395       if ( garray[B->j[j]] > cstart) break;
1396       column_values[cnt++] = B->a[j];
1397     }
1398     for (k=A->i[i]; k<A->i[i+1]; k++) {
1399       column_values[cnt++] = A->a[k];
1400     }
1401     for (; j<B->i[i+1]; j++) {
1402       column_values[cnt++] = B->a[j];
1403     }
1404   }
1405   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1406 
1407   /* store the column values to the file */
1408    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1409   if (!rank) {
1410     MPI_Status status;
1411     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     for (i=1; i<size; i++) {
1413        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1414       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1415       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1416       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1418     }
1419     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1420   } else {
1421     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1422     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1424     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1425   }
1426   ierr = PetscFree(column_values);CHKERRQ(ierr);
1427 
1428   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1429   if (file) {
1430     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1431   }
1432 
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1439 {
1440   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1441   PetscErrorCode    ierr;
1442   PetscMPIInt       rank = aij->rank,size = aij->size;
1443   PetscBool         isdraw,iascii,isbinary;
1444   PetscViewer       sviewer;
1445   PetscViewerFormat format;
1446 
1447   PetscFunctionBegin;
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1451   if (iascii) {
1452     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1453     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1454       MatInfo    info;
1455       PetscBool  inodes;
1456 
1457       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1458       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1461       if (!inodes) {
1462         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1463 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1464       } else {
1465         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1466 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1467       }
1468       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1469       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1470       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1472       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1474       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1475       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1476       PetscFunctionReturn(0);
1477     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1478       PetscInt   inodecount,inodelimit,*inodes;
1479       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1480       if (inodes) {
1481         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1482       } else {
1483         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1484       }
1485       PetscFunctionReturn(0);
1486     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1487       PetscFunctionReturn(0);
1488     }
1489   } else if (isbinary) {
1490     if (size == 1) {
1491       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1492       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1493     } else {
1494       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1495     }
1496     PetscFunctionReturn(0);
1497   } else if (isdraw) {
1498     PetscDraw  draw;
1499     PetscBool  isnull;
1500     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1501     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1502   }
1503 
1504   if (size == 1) {
1505     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1506     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1507   } else {
1508     /* assemble the entire matrix onto first processor. */
1509     Mat         A;
1510     Mat_SeqAIJ  *Aloc;
1511     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1512     MatScalar   *a;
1513 
1514     if (mat->rmap->N > 1024) {
1515       PetscBool  flg = PETSC_FALSE;
1516 
1517       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1518       if (!flg) {
1519         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1520       }
1521     }
1522 
1523     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1524     if (!rank) {
1525       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1526     } else {
1527       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1528     }
1529     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1530     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1531     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1532     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1533     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1534 
1535     /* copy over the A part */
1536     Aloc = (Mat_SeqAIJ*)aij->A->data;
1537     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row = mat->rmap->rstart;
1539     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1542       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1543     }
1544     aj = Aloc->j;
1545     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1546 
1547     /* copy over the B part */
1548     Aloc = (Mat_SeqAIJ*)aij->B->data;
1549     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550     row  = mat->rmap->rstart;
1551     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1552     ct   = cols;
1553     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1554     for (i=0; i<m; i++) {
1555       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1556       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1557     }
1558     ierr = PetscFree(ct);CHKERRQ(ierr);
1559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1561     /*
1562        Everyone has to call to draw the matrix since the graphics waits are
1563        synchronized across all processors that share the PetscDraw object
1564     */
1565     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     if (!rank) {
1567       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1568       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1569       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1570       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1571     }
1572     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1573     ierr = MatDestroy(&A);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatView_MPIAIJ"
1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1581 {
1582   PetscErrorCode ierr;
1583   PetscBool      iascii,isdraw,issocket,isbinary;
1584 
1585   PetscFunctionBegin;
1586   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1587   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1588   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1589   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1590   if (iascii || isdraw || isbinary || issocket) {
1591     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1592   } else {
1593     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatSOR_MPIAIJ"
1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1601 {
1602   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1603   PetscErrorCode ierr;
1604   Vec            bb1 = 0;
1605   PetscBool      hasop;
1606 
1607   PetscFunctionBegin;
1608   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1609     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1610   }
1611 
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622 
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1651     if (flag & SOR_ZERO_INITIAL_GUESS) {
1652       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1653       its--;
1654     }
1655     while (its--) {
1656       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658 
1659       /* update rhs: bb1 = bb - B*x */
1660       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1661       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1662 
1663       /* local sweep */
1664       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1665     }
1666   }  else if (flag & SOR_EISENSTAT) {
1667     Vec         xx1;
1668 
1669     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1670     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1671 
1672     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1674     if (!mat->diag) {
1675       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1676       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1677     }
1678     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1679     if (hasop) {
1680       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1681     } else {
1682       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1683     }
1684     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1685 
1686     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1687 
1688     /* local sweep */
1689     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1691     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1692   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1693 
1694   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatPermute_MPIAIJ"
1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1701 {
1702   MPI_Comm       comm;
1703   PetscInt       first,local_rowsize,local_colsize;
1704   const PetscInt *rows;
1705   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1710   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1711   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1712   /* collect the global row permutation and invert it */
1713   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1714   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1715   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1716   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1717   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1718   /* get the local target indices */
1719   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1720   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1721   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1722   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1723   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1724   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1725   /* the column permutation is so much easier;
1726      make a local version of 'colp' and invert it */
1727   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1728   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1729   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1730   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1731   /* now we just get the submatrix */
1732   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1733   /* clean up */
1734   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1735   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   Mat            A = mat->A,B = mat->B;
1745   PetscErrorCode ierr;
1746   PetscReal      isend[5],irecv[5];
1747 
1748   PetscFunctionBegin;
1749   info->block_size     = 1.0;
1750   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1752   isend[3] = info->memory;  isend[4] = info->mallocs;
1753   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1754   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1755   isend[3] += info->memory;  isend[4] += info->mallocs;
1756   if (flag == MAT_LOCAL) {
1757     info->nz_used      = isend[0];
1758     info->nz_allocated = isend[1];
1759     info->nz_unneeded  = isend[2];
1760     info->memory       = isend[3];
1761     info->mallocs      = isend[4];
1762   } else if (flag == MAT_GLOBAL_MAX) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   } else if (flag == MAT_GLOBAL_SUM) {
1770     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1771     info->nz_used      = irecv[0];
1772     info->nz_allocated = irecv[1];
1773     info->nz_unneeded  = irecv[2];
1774     info->memory       = irecv[3];
1775     info->mallocs      = irecv[4];
1776   }
1777   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1778   info->fill_ratio_needed = 0;
1779   info->factor_mallocs    = 0;
1780 
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatSetOption_MPIAIJ"
1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1787 {
1788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1789   PetscErrorCode ierr;
1790 
1791   PetscFunctionBegin;
1792   switch (op) {
1793   case MAT_NEW_NONZERO_LOCATIONS:
1794   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1795   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1796   case MAT_KEEP_NONZERO_PATTERN:
1797   case MAT_NEW_NONZERO_LOCATION_ERR:
1798   case MAT_USE_INODES:
1799   case MAT_IGNORE_ZERO_ENTRIES:
1800     MatCheckPreallocated(A,1);
1801     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1802     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1803     break;
1804   case MAT_ROW_ORIENTED:
1805     a->roworiented = flg;
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1808     break;
1809   case MAT_NEW_DIAGONALS:
1810     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1811     break;
1812   case MAT_IGNORE_OFF_PROC_ENTRIES:
1813     a->donotstash = flg;
1814     break;
1815   case MAT_SPD:
1816     A->spd_set                         = PETSC_TRUE;
1817     A->spd                             = flg;
1818     if (flg) {
1819       A->symmetric                     = PETSC_TRUE;
1820       A->structurally_symmetric        = PETSC_TRUE;
1821       A->symmetric_set                 = PETSC_TRUE;
1822       A->structurally_symmetric_set    = PETSC_TRUE;
1823     }
1824     break;
1825   case MAT_SYMMETRIC:
1826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1827     break;
1828   case MAT_STRUCTURALLY_SYMMETRIC:
1829     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1830     break;
1831   case MAT_HERMITIAN:
1832     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1833     break;
1834   case MAT_SYMMETRY_ETERNAL:
1835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1836     break;
1837   default:
1838     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatGetRow_MPIAIJ"
1845 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1846 {
1847   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1848   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1849   PetscErrorCode ierr;
1850   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1851   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1852   PetscInt       *cmap,*idx_p;
1853 
1854   PetscFunctionBegin;
1855   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1856   mat->getrowactive = PETSC_TRUE;
1857 
1858   if (!mat->rowvalues && (idx || v)) {
1859     /*
1860         allocate enough space to hold information from the longest row.
1861     */
1862     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1863     PetscInt   max = 1,tmp;
1864     for (i=0; i<matin->rmap->n; i++) {
1865       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1866       if (max < tmp) { max = tmp; }
1867     }
1868     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1869   }
1870 
1871   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1872   lrow = row - rstart;
1873 
1874   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1875   if (!v)   {pvA = 0; pvB = 0;}
1876   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1877   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1878   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1879   nztot = nzA + nzB;
1880 
1881   cmap  = mat->garray;
1882   if (v  || idx) {
1883     if (nztot) {
1884       /* Sort by increasing column numbers, assuming A and B already sorted */
1885       PetscInt imark = -1;
1886       if (v) {
1887         *v = v_p = mat->rowvalues;
1888         for (i=0; i<nzB; i++) {
1889           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1890           else break;
1891         }
1892         imark = i;
1893         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1894         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1895       }
1896       if (idx) {
1897         *idx = idx_p = mat->rowindices;
1898         if (imark > -1) {
1899           for (i=0; i<imark; i++) {
1900             idx_p[i] = cmap[cworkB[i]];
1901           }
1902         } else {
1903           for (i=0; i<nzB; i++) {
1904             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1905             else break;
1906           }
1907           imark = i;
1908         }
1909         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1910         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1911       }
1912     } else {
1913       if (idx) *idx = 0;
1914       if (v)   *v   = 0;
1915     }
1916   }
1917   *nz = nztot;
1918   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1919   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1925 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1926 {
1927   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1928 
1929   PetscFunctionBegin;
1930   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1931   aij->getrowactive = PETSC_FALSE;
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatNorm_MPIAIJ"
1937 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1938 {
1939   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1940   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1941   PetscErrorCode ierr;
1942   PetscInt       i,j,cstart = mat->cmap->rstart;
1943   PetscReal      sum = 0.0;
1944   MatScalar      *v;
1945 
1946   PetscFunctionBegin;
1947   if (aij->size == 1) {
1948     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1949   } else {
1950     if (type == NORM_FROBENIUS) {
1951       v = amat->a;
1952       for (i=0; i<amat->nz; i++) {
1953 #if defined(PETSC_USE_COMPLEX)
1954         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1955 #else
1956         sum += (*v)*(*v); v++;
1957 #endif
1958       }
1959       v = bmat->a;
1960       for (i=0; i<bmat->nz; i++) {
1961 #if defined(PETSC_USE_COMPLEX)
1962         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963 #else
1964         sum += (*v)*(*v); v++;
1965 #endif
1966       }
1967       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1968       *norm = PetscSqrtReal(*norm);
1969     } else if (type == NORM_1) { /* max column norm */
1970       PetscReal *tmp,*tmp2;
1971       PetscInt  *jj,*garray = aij->garray;
1972       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1973       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1974       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1975       *norm = 0.0;
1976       v = amat->a; jj = amat->j;
1977       for (j=0; j<amat->nz; j++) {
1978         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1979       }
1980       v = bmat->a; jj = bmat->j;
1981       for (j=0; j<bmat->nz; j++) {
1982         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1983       }
1984       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1985       for (j=0; j<mat->cmap->N; j++) {
1986         if (tmp2[j] > *norm) *norm = tmp2[j];
1987       }
1988       ierr = PetscFree(tmp);CHKERRQ(ierr);
1989       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1990     } else if (type == NORM_INFINITY) { /* max row norm */
1991       PetscReal ntemp = 0.0;
1992       for (j=0; j<aij->A->rmap->n; j++) {
1993         v = amat->a + amat->i[j];
1994         sum = 0.0;
1995         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1996           sum += PetscAbsScalar(*v); v++;
1997         }
1998         v = bmat->a + bmat->i[j];
1999         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2000           sum += PetscAbsScalar(*v); v++;
2001         }
2002         if (sum > ntemp) ntemp = sum;
2003       }
2004       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2005     } else {
2006       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2007     }
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 #undef __FUNCT__
2013 #define __FUNCT__ "MatTranspose_MPIAIJ"
2014 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2015 {
2016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2017   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2018   PetscErrorCode ierr;
2019   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2020   PetscInt       cstart=A->cmap->rstart,ncol;
2021   Mat            B;
2022   MatScalar      *array;
2023 
2024   PetscFunctionBegin;
2025   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2026 
2027   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2028   ai = Aloc->i; aj = Aloc->j;
2029   bi = Bloc->i; bj = Bloc->j;
2030   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2031     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2032     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2033     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2034     for (i=0; i<ai[ma]; i++){
2035       d_nnz[aj[i]] ++;
2036       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2037     }
2038 
2039     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2040     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2041     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2042     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2043     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2044     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2045     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2046   } else {
2047     B = *matout;
2048   }
2049 
2050   /* copy over the A part */
2051   array = Aloc->a;
2052   row = A->rmap->rstart;
2053   for (i=0; i<ma; i++) {
2054     ncol = ai[i+1]-ai[i];
2055     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2056     row++; array += ncol; aj += ncol;
2057   }
2058   aj = Aloc->j;
2059   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2060 
2061   /* copy over the B part */
2062   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2063   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2064   array = Bloc->a;
2065   row = A->rmap->rstart;
2066   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2067   cols_tmp = cols;
2068   for (i=0; i<mb; i++) {
2069     ncol = bi[i+1]-bi[i];
2070     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2071     row++; array += ncol; cols_tmp += ncol;
2072   }
2073   ierr = PetscFree(cols);CHKERRQ(ierr);
2074 
2075   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2077   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2078     *matout = B;
2079   } else {
2080     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2114   }
2115 
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2121 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2122 {
2123   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2124   PetscErrorCode ierr;
2125 
2126   PetscFunctionBegin;
2127   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatEqual_MPIAIJ"
2133 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2134 {
2135   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2136   Mat            a,b,c,d;
2137   PetscBool      flg;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   a = matA->A; b = matA->B;
2142   c = matB->A; d = matB->B;
2143 
2144   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2145   if (flg) {
2146     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2147   }
2148   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatCopy_MPIAIJ"
2154 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2155 {
2156   PetscErrorCode ierr;
2157   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2158   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2159 
2160   PetscFunctionBegin;
2161   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2162   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2163     /* because of the column compression in the off-processor part of the matrix a->B,
2164        the number of columns in a->B and b->B may be different, hence we cannot call
2165        the MatCopy() directly on the two parts. If need be, we can provide a more
2166        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2167        then copying the submatrices */
2168     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2169   } else {
2170     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2171     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 #undef __FUNCT__
2177 #define __FUNCT__ "MatSetUp_MPIAIJ"
2178 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2179 {
2180   PetscErrorCode ierr;
2181 
2182   PetscFunctionBegin;
2183   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2189 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2190 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2191 {
2192   PetscInt          i,m=Y->rmap->N;
2193   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2194   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2195   const PetscInt    *xi = x->i,*yi = y->i;
2196 
2197   PetscFunctionBegin;
2198   /* Set the number of nonzeros in the new matrix */
2199   for(i=0; i<m; i++) {
2200     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2201     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2202     nnz[i] = 0;
2203     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2204       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2205       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2206       nnz[i]++;
2207     }
2208     for (; k<nzy; k++) nnz[i]++;
2209   }
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatAXPY_MPIAIJ"
2215 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2216 {
2217   PetscErrorCode ierr;
2218   PetscInt       i;
2219   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2220   PetscBLASInt   bnz,one=1;
2221   Mat_SeqAIJ     *x,*y;
2222 
2223   PetscFunctionBegin;
2224   if (str == SAME_NONZERO_PATTERN) {
2225     PetscScalar alpha = a;
2226     x = (Mat_SeqAIJ *)xx->A->data;
2227     y = (Mat_SeqAIJ *)yy->A->data;
2228     bnz = PetscBLASIntCast(x->nz);
2229     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2230     x = (Mat_SeqAIJ *)xx->B->data;
2231     y = (Mat_SeqAIJ *)yy->B->data;
2232     bnz = PetscBLASIntCast(x->nz);
2233     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2234   } else if (str == SUBSET_NONZERO_PATTERN) {
2235     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2236 
2237     x = (Mat_SeqAIJ *)xx->B->data;
2238     y = (Mat_SeqAIJ *)yy->B->data;
2239     if (y->xtoy && y->XtoY != xx->B) {
2240       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2241       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2242     }
2243     if (!y->xtoy) { /* get xtoy */
2244       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2245       y->XtoY = xx->B;
2246       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2247     }
2248     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2249   } else {
2250     Mat B;
2251     PetscInt *nnz_d,*nnz_o;
2252     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2253     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2254     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2255     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2256     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2257     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2258     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2259     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2260     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2261     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2262     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2263     ierr = MatHeaderReplace(Y,B);
2264     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2265     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2266   }
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatConjugate_MPIAIJ"
2274 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2275 {
2276 #if defined(PETSC_USE_COMPLEX)
2277   PetscErrorCode ierr;
2278   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2279 
2280   PetscFunctionBegin;
2281   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2282   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2283 #else
2284   PetscFunctionBegin;
2285 #endif
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatRealPart_MPIAIJ"
2291 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2292 {
2293   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2298   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2305 {
2306   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2311   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 #ifdef PETSC_HAVE_PBGL
2316 
2317 #include <boost/parallel/mpi/bsp_process_group.hpp>
2318 #include <boost/graph/distributed/ilu_default_graph.hpp>
2319 #include <boost/graph/distributed/ilu_0_block.hpp>
2320 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2321 #include <boost/graph/distributed/petsc/interface.hpp>
2322 #include <boost/multi_array.hpp>
2323 #include <boost/parallel/distributed_property_map->hpp>
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2327 /*
2328   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2329 */
2330 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2331 {
2332   namespace petsc = boost::distributed::petsc;
2333 
2334   namespace graph_dist = boost::graph::distributed;
2335   using boost::graph::distributed::ilu_default::process_group_type;
2336   using boost::graph::ilu_permuted;
2337 
2338   PetscBool       row_identity, col_identity;
2339   PetscContainer  c;
2340   PetscInt        m, n, M, N;
2341   PetscErrorCode  ierr;
2342 
2343   PetscFunctionBegin;
2344   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2345   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2346   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2347   if (!row_identity || !col_identity) {
2348     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2349   }
2350 
2351   process_group_type pg;
2352   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2353   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2354   lgraph_type&   level_graph = *lgraph_p;
2355   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2356 
2357   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2358   ilu_permuted(level_graph);
2359 
2360   /* put together the new matrix */
2361   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2362   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2363   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2364   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2365   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2366   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2367   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2368   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369 
2370   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2371   ierr = PetscContainerSetPointer(c, lgraph_p);
2372   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2373   ierr = PetscContainerDestroy(&c);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2379 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2380 {
2381   PetscFunctionBegin;
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatSolve_MPIAIJ"
2387 /*
2388   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2389 */
2390 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2391 {
2392   namespace graph_dist = boost::graph::distributed;
2393 
2394   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2395   lgraph_type*   lgraph_p;
2396   PetscContainer c;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2401   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2402   ierr = VecCopy(b, x);CHKERRQ(ierr);
2403 
2404   PetscScalar* array_x;
2405   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2406   PetscInt sx;
2407   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2408 
2409   PetscScalar* array_b;
2410   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2411   PetscInt sb;
2412   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2413 
2414   lgraph_type&   level_graph = *lgraph_p;
2415   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2416 
2417   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2418   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2419                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2420 
2421   typedef boost::iterator_property_map<array_ref_type::iterator,
2422                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2423   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2424                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2425 
2426   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2427 
2428   PetscFunctionReturn(0);
2429 }
2430 #endif
2431 
2432 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2433   PetscInt       nzlocal,nsends,nrecvs;
2434   PetscMPIInt    *send_rank,*recv_rank;
2435   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2436   PetscScalar    *sbuf_a,**rbuf_a;
2437   PetscErrorCode (*Destroy)(Mat);
2438 } Mat_Redundant;
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2442 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2443 {
2444   PetscErrorCode       ierr;
2445   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2446   PetscInt             i;
2447 
2448   PetscFunctionBegin;
2449   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2450   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2451   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2452   for (i=0; i<redund->nrecvs; i++){
2453     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2454     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2455   }
2456   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2457   ierr = PetscFree(redund);CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "MatDestroy_MatRedundant"
2463 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2464 {
2465   PetscErrorCode  ierr;
2466   PetscContainer  container;
2467   Mat_Redundant   *redund=PETSC_NULL;
2468 
2469   PetscFunctionBegin;
2470   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2471   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2472   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2473   A->ops->destroy = redund->Destroy;
2474   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2475   if (A->ops->destroy) {
2476     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2477   }
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2483 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2484 {
2485   PetscMPIInt    rank,size;
2486   MPI_Comm       comm=((PetscObject)mat)->comm;
2487   PetscErrorCode ierr;
2488   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2489   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2490   PetscInt       *rowrange=mat->rmap->range;
2491   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2492   Mat            A=aij->A,B=aij->B,C=*matredundant;
2493   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2494   PetscScalar    *sbuf_a;
2495   PetscInt       nzlocal=a->nz+b->nz;
2496   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2497   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2498   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2499   MatScalar      *aworkA,*aworkB;
2500   PetscScalar    *vals;
2501   PetscMPIInt    tag1,tag2,tag3,imdex;
2502   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2503                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2504   MPI_Status     recv_status,*send_status;
2505   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2506   PetscInt       **rbuf_j=PETSC_NULL;
2507   PetscScalar    **rbuf_a=PETSC_NULL;
2508   Mat_Redundant  *redund=PETSC_NULL;
2509   PetscContainer container;
2510 
2511   PetscFunctionBegin;
2512   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2513   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2514 
2515   if (reuse == MAT_REUSE_MATRIX) {
2516     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2517     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2518     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2519     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2520     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2521     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2522     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2523     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2524 
2525     nsends    = redund->nsends;
2526     nrecvs    = redund->nrecvs;
2527     send_rank = redund->send_rank;
2528     recv_rank = redund->recv_rank;
2529     sbuf_nz   = redund->sbuf_nz;
2530     rbuf_nz   = redund->rbuf_nz;
2531     sbuf_j    = redund->sbuf_j;
2532     sbuf_a    = redund->sbuf_a;
2533     rbuf_j    = redund->rbuf_j;
2534     rbuf_a    = redund->rbuf_a;
2535   }
2536 
2537   if (reuse == MAT_INITIAL_MATRIX){
2538     PetscMPIInt  subrank,subsize;
2539     PetscInt     nleftover,np_subcomm;
2540     /* get the destination processors' id send_rank, nsends and nrecvs */
2541     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2542     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2543     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2544     np_subcomm = size/nsubcomm;
2545     nleftover  = size - nsubcomm*np_subcomm;
2546     nsends = 0; nrecvs = 0;
2547     for (i=0; i<size; i++){ /* i=rank*/
2548       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2549         send_rank[nsends] = i; nsends++;
2550         recv_rank[nrecvs++] = i;
2551       }
2552     }
2553     if (rank >= size - nleftover){/* this proc is a leftover processor */
2554       i = size-nleftover-1;
2555       j = 0;
2556       while (j < nsubcomm - nleftover){
2557         send_rank[nsends++] = i;
2558         i--; j++;
2559       }
2560     }
2561 
2562     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2563       for (i=0; i<nleftover; i++){
2564         recv_rank[nrecvs++] = size-nleftover+i;
2565       }
2566     }
2567 
2568     /* allocate sbuf_j, sbuf_a */
2569     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2570     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2571     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2572   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2573 
2574   /* copy mat's local entries into the buffers */
2575   if (reuse == MAT_INITIAL_MATRIX){
2576     rownz_max = 0;
2577     rptr = sbuf_j;
2578     cols = sbuf_j + rend-rstart + 1;
2579     vals = sbuf_a;
2580     rptr[0] = 0;
2581     for (i=0; i<rend-rstart; i++){
2582       row = i + rstart;
2583       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2584       ncols  = nzA + nzB;
2585       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2586       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2587       /* load the column indices for this row into cols */
2588       lwrite = 0;
2589       for (l=0; l<nzB; l++) {
2590         if ((ctmp = bmap[cworkB[l]]) < cstart){
2591           vals[lwrite]   = aworkB[l];
2592           cols[lwrite++] = ctmp;
2593         }
2594       }
2595       for (l=0; l<nzA; l++){
2596         vals[lwrite]   = aworkA[l];
2597         cols[lwrite++] = cstart + cworkA[l];
2598       }
2599       for (l=0; l<nzB; l++) {
2600         if ((ctmp = bmap[cworkB[l]]) >= cend){
2601           vals[lwrite]   = aworkB[l];
2602           cols[lwrite++] = ctmp;
2603         }
2604       }
2605       vals += ncols;
2606       cols += ncols;
2607       rptr[i+1] = rptr[i] + ncols;
2608       if (rownz_max < ncols) rownz_max = ncols;
2609     }
2610     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2611   } else { /* only copy matrix values into sbuf_a */
2612     rptr = sbuf_j;
2613     vals = sbuf_a;
2614     rptr[0] = 0;
2615     for (i=0; i<rend-rstart; i++){
2616       row = i + rstart;
2617       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2618       ncols  = nzA + nzB;
2619       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2620       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2621       lwrite = 0;
2622       for (l=0; l<nzB; l++) {
2623         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2624       }
2625       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2626       for (l=0; l<nzB; l++) {
2627         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2628       }
2629       vals += ncols;
2630       rptr[i+1] = rptr[i] + ncols;
2631     }
2632   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2633 
2634   /* send nzlocal to others, and recv other's nzlocal */
2635   /*--------------------------------------------------*/
2636   if (reuse == MAT_INITIAL_MATRIX){
2637     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2638     s_waits2 = s_waits3 + nsends;
2639     s_waits1 = s_waits2 + nsends;
2640     r_waits1 = s_waits1 + nsends;
2641     r_waits2 = r_waits1 + nrecvs;
2642     r_waits3 = r_waits2 + nrecvs;
2643   } else {
2644     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2645     r_waits3 = s_waits3 + nsends;
2646   }
2647 
2648   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2649   if (reuse == MAT_INITIAL_MATRIX){
2650     /* get new tags to keep the communication clean */
2651     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2652     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2653     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2654 
2655     /* post receives of other's nzlocal */
2656     for (i=0; i<nrecvs; i++){
2657       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2658     }
2659     /* send nzlocal to others */
2660     for (i=0; i<nsends; i++){
2661       sbuf_nz[i] = nzlocal;
2662       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2663     }
2664     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2665     count = nrecvs;
2666     while (count) {
2667       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2668       recv_rank[imdex] = recv_status.MPI_SOURCE;
2669       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2670       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2671 
2672       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2673       rbuf_nz[imdex] += i + 2;
2674       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2675       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2676       count--;
2677     }
2678     /* wait on sends of nzlocal */
2679     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2680     /* send mat->i,j to others, and recv from other's */
2681     /*------------------------------------------------*/
2682     for (i=0; i<nsends; i++){
2683       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2684       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2685     }
2686     /* wait on receives of mat->i,j */
2687     /*------------------------------*/
2688     count = nrecvs;
2689     while (count) {
2690       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2691       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2692       count--;
2693     }
2694     /* wait on sends of mat->i,j */
2695     /*---------------------------*/
2696     if (nsends) {
2697       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2698     }
2699   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2700 
2701   /* post receives, send and receive mat->a */
2702   /*----------------------------------------*/
2703   for (imdex=0; imdex<nrecvs; imdex++) {
2704     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2705   }
2706   for (i=0; i<nsends; i++){
2707     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2708   }
2709   count = nrecvs;
2710   while (count) {
2711     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2712     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2713     count--;
2714   }
2715   if (nsends) {
2716     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2717   }
2718 
2719   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2720 
2721   /* create redundant matrix */
2722   /*-------------------------*/
2723   if (reuse == MAT_INITIAL_MATRIX){
2724     /* compute rownz_max for preallocation */
2725     for (imdex=0; imdex<nrecvs; imdex++){
2726       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2727       rptr = rbuf_j[imdex];
2728       for (i=0; i<j; i++){
2729         ncols = rptr[i+1] - rptr[i];
2730         if (rownz_max < ncols) rownz_max = ncols;
2731       }
2732     }
2733 
2734     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2735     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2736     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2737     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2738     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2739     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2740   } else {
2741     C = *matredundant;
2742   }
2743 
2744   /* insert local matrix entries */
2745   rptr = sbuf_j;
2746   cols = sbuf_j + rend-rstart + 1;
2747   vals = sbuf_a;
2748   for (i=0; i<rend-rstart; i++){
2749     row   = i + rstart;
2750     ncols = rptr[i+1] - rptr[i];
2751     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2752     vals += ncols;
2753     cols += ncols;
2754   }
2755   /* insert received matrix entries */
2756   for (imdex=0; imdex<nrecvs; imdex++){
2757     rstart = rowrange[recv_rank[imdex]];
2758     rend   = rowrange[recv_rank[imdex]+1];
2759     rptr = rbuf_j[imdex];
2760     cols = rbuf_j[imdex] + rend-rstart + 1;
2761     vals = rbuf_a[imdex];
2762     for (i=0; i<rend-rstart; i++){
2763       row   = i + rstart;
2764       ncols = rptr[i+1] - rptr[i];
2765       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2766       vals += ncols;
2767       cols += ncols;
2768     }
2769   }
2770   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2771   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2772   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2773   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2774   if (reuse == MAT_INITIAL_MATRIX) {
2775     PetscContainer container;
2776     *matredundant = C;
2777     /* create a supporting struct and attach it to C for reuse */
2778     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2779     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2780     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2781     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2782     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2783     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2784 
2785     redund->nzlocal = nzlocal;
2786     redund->nsends  = nsends;
2787     redund->nrecvs  = nrecvs;
2788     redund->send_rank = send_rank;
2789     redund->recv_rank = recv_rank;
2790     redund->sbuf_nz = sbuf_nz;
2791     redund->rbuf_nz = rbuf_nz;
2792     redund->sbuf_j  = sbuf_j;
2793     redund->sbuf_a  = sbuf_a;
2794     redund->rbuf_j  = rbuf_j;
2795     redund->rbuf_a  = rbuf_a;
2796 
2797     redund->Destroy = C->ops->destroy;
2798     C->ops->destroy = MatDestroy_MatRedundant;
2799   }
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2805 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2806 {
2807   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2808   PetscErrorCode ierr;
2809   PetscInt       i,*idxb = 0;
2810   PetscScalar    *va,*vb;
2811   Vec            vtmp;
2812 
2813   PetscFunctionBegin;
2814   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2815   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2816   if (idx) {
2817     for (i=0; i<A->rmap->n; i++) {
2818       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2819     }
2820   }
2821 
2822   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2823   if (idx) {
2824     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2825   }
2826   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2827   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2828 
2829   for (i=0; i<A->rmap->n; i++){
2830     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2831       va[i] = vb[i];
2832       if (idx) idx[i] = a->garray[idxb[i]];
2833     }
2834   }
2835 
2836   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2837   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2838   ierr = PetscFree(idxb);CHKERRQ(ierr);
2839   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 #undef __FUNCT__
2844 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2845 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2846 {
2847   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2848   PetscErrorCode ierr;
2849   PetscInt       i,*idxb = 0;
2850   PetscScalar    *va,*vb;
2851   Vec            vtmp;
2852 
2853   PetscFunctionBegin;
2854   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2855   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2856   if (idx) {
2857     for (i=0; i<A->cmap->n; i++) {
2858       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2859     }
2860   }
2861 
2862   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2863   if (idx) {
2864     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2865   }
2866   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2867   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2868 
2869   for (i=0; i<A->rmap->n; i++){
2870     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2871       va[i] = vb[i];
2872       if (idx) idx[i] = a->garray[idxb[i]];
2873     }
2874   }
2875 
2876   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2877   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2878   ierr = PetscFree(idxb);CHKERRQ(ierr);
2879   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2880   PetscFunctionReturn(0);
2881 }
2882 
2883 #undef __FUNCT__
2884 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2885 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2886 {
2887   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2888   PetscInt       n      = A->rmap->n;
2889   PetscInt       cstart = A->cmap->rstart;
2890   PetscInt      *cmap   = mat->garray;
2891   PetscInt      *diagIdx, *offdiagIdx;
2892   Vec            diagV, offdiagV;
2893   PetscScalar   *a, *diagA, *offdiagA;
2894   PetscInt       r;
2895   PetscErrorCode ierr;
2896 
2897   PetscFunctionBegin;
2898   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2899   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2900   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2901   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2902   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2903   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2904   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2905   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2906   for(r = 0; r < n; ++r) {
2907     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2908       a[r]   = diagA[r];
2909       idx[r] = cstart + diagIdx[r];
2910     } else {
2911       a[r]   = offdiagA[r];
2912       idx[r] = cmap[offdiagIdx[r]];
2913     }
2914   }
2915   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2916   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2917   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2918   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2919   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2920   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 #undef __FUNCT__
2925 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2926 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2927 {
2928   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2929   PetscInt       n      = A->rmap->n;
2930   PetscInt       cstart = A->cmap->rstart;
2931   PetscInt      *cmap   = mat->garray;
2932   PetscInt      *diagIdx, *offdiagIdx;
2933   Vec            diagV, offdiagV;
2934   PetscScalar   *a, *diagA, *offdiagA;
2935   PetscInt       r;
2936   PetscErrorCode ierr;
2937 
2938   PetscFunctionBegin;
2939   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2940   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2941   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2942   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2943   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2944   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2945   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2946   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2947   for(r = 0; r < n; ++r) {
2948     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2949       a[r]   = diagA[r];
2950       idx[r] = cstart + diagIdx[r];
2951     } else {
2952       a[r]   = offdiagA[r];
2953       idx[r] = cmap[offdiagIdx[r]];
2954     }
2955   }
2956   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2957   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2958   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2959   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2960   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2961   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 #undef __FUNCT__
2966 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2967 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2968 {
2969   PetscErrorCode ierr;
2970   Mat            *dummy;
2971 
2972   PetscFunctionBegin;
2973   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2974   *newmat = *dummy;
2975   ierr = PetscFree(dummy);CHKERRQ(ierr);
2976   PetscFunctionReturn(0);
2977 }
2978 
2979 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2980 
2981 #undef __FUNCT__
2982 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2983 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2984 {
2985   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2986   PetscErrorCode ierr;
2987 
2988   PetscFunctionBegin;
2989   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2990   PetscFunctionReturn(0);
2991 }
2992 
2993 
2994 /* -------------------------------------------------------------------*/
2995 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2996        MatGetRow_MPIAIJ,
2997        MatRestoreRow_MPIAIJ,
2998        MatMult_MPIAIJ,
2999 /* 4*/ MatMultAdd_MPIAIJ,
3000        MatMultTranspose_MPIAIJ,
3001        MatMultTransposeAdd_MPIAIJ,
3002 #ifdef PETSC_HAVE_PBGL
3003        MatSolve_MPIAIJ,
3004 #else
3005        0,
3006 #endif
3007        0,
3008        0,
3009 /*10*/ 0,
3010        0,
3011        0,
3012        MatSOR_MPIAIJ,
3013        MatTranspose_MPIAIJ,
3014 /*15*/ MatGetInfo_MPIAIJ,
3015        MatEqual_MPIAIJ,
3016        MatGetDiagonal_MPIAIJ,
3017        MatDiagonalScale_MPIAIJ,
3018        MatNorm_MPIAIJ,
3019 /*20*/ MatAssemblyBegin_MPIAIJ,
3020        MatAssemblyEnd_MPIAIJ,
3021        MatSetOption_MPIAIJ,
3022        MatZeroEntries_MPIAIJ,
3023 /*24*/ MatZeroRows_MPIAIJ,
3024        0,
3025 #ifdef PETSC_HAVE_PBGL
3026        0,
3027 #else
3028        0,
3029 #endif
3030        0,
3031        0,
3032 /*29*/ MatSetUp_MPIAIJ,
3033 #ifdef PETSC_HAVE_PBGL
3034        0,
3035 #else
3036        0,
3037 #endif
3038        0,
3039        0,
3040        0,
3041 /*34*/ MatDuplicate_MPIAIJ,
3042        0,
3043        0,
3044        0,
3045        0,
3046 /*39*/ MatAXPY_MPIAIJ,
3047        MatGetSubMatrices_MPIAIJ,
3048        MatIncreaseOverlap_MPIAIJ,
3049        MatGetValues_MPIAIJ,
3050        MatCopy_MPIAIJ,
3051 /*44*/ MatGetRowMax_MPIAIJ,
3052        MatScale_MPIAIJ,
3053        0,
3054        0,
3055        MatZeroRowsColumns_MPIAIJ,
3056 /*49*/ 0,
3057        0,
3058        0,
3059        0,
3060        0,
3061 /*54*/ MatFDColoringCreate_MPIAIJ,
3062        0,
3063        MatSetUnfactored_MPIAIJ,
3064        0, /* MatPermute_MPIAIJ, impl currently broken */
3065        0,
3066 /*59*/ MatGetSubMatrix_MPIAIJ,
3067        MatDestroy_MPIAIJ,
3068        MatView_MPIAIJ,
3069        0,
3070        0,
3071 /*64*/ 0,
3072        0,
3073        0,
3074        0,
3075        0,
3076 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3077        MatGetRowMinAbs_MPIAIJ,
3078        0,
3079        MatSetColoring_MPIAIJ,
3080 #if defined(PETSC_HAVE_ADIC)
3081        MatSetValuesAdic_MPIAIJ,
3082 #else
3083        0,
3084 #endif
3085        MatSetValuesAdifor_MPIAIJ,
3086 /*75*/ MatFDColoringApply_AIJ,
3087        0,
3088        0,
3089        0,
3090        0,
3091 /*80*/ 0,
3092        0,
3093        0,
3094 /*83*/ MatLoad_MPIAIJ,
3095        0,
3096        0,
3097        0,
3098        0,
3099        0,
3100 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3101        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3102        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3103        MatPtAP_Basic,
3104        MatPtAPSymbolic_MPIAIJ,
3105 /*94*/ MatPtAPNumeric_MPIAIJ,
3106        0,
3107        0,
3108        0,
3109        0,
3110 /*99*/ 0,
3111        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3112        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3113        MatConjugate_MPIAIJ,
3114        0,
3115 /*104*/MatSetValuesRow_MPIAIJ,
3116        MatRealPart_MPIAIJ,
3117        MatImaginaryPart_MPIAIJ,
3118        0,
3119        0,
3120 /*109*/0,
3121        MatGetRedundantMatrix_MPIAIJ,
3122        MatGetRowMin_MPIAIJ,
3123        0,
3124        0,
3125 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3126        0,
3127        0,
3128        0,
3129        0,
3130 /*119*/0,
3131        0,
3132        0,
3133        0,
3134        MatGetMultiProcBlock_MPIAIJ,
3135 /*124*/MatFindNonZeroRows_MPIAIJ,
3136        MatGetColumnNorms_MPIAIJ,
3137        MatInvertBlockDiagonal_MPIAIJ,
3138        0,
3139        MatGetSubMatricesParallel_MPIAIJ,
3140 /*129*/0,
3141        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3142        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3143        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3144        0,
3145 /*134*/0,
3146        0,
3147        0,
3148        0,
3149        0
3150 };
3151 
3152 /* ----------------------------------------------------------------------------------------*/
3153 
3154 EXTERN_C_BEGIN
3155 #undef __FUNCT__
3156 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3157 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3158 {
3159   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3160   PetscErrorCode ierr;
3161 
3162   PetscFunctionBegin;
3163   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3164   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3165   PetscFunctionReturn(0);
3166 }
3167 EXTERN_C_END
3168 
3169 EXTERN_C_BEGIN
3170 #undef __FUNCT__
3171 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3172 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3173 {
3174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3175   PetscErrorCode ierr;
3176 
3177   PetscFunctionBegin;
3178   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3179   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3180   PetscFunctionReturn(0);
3181 }
3182 EXTERN_C_END
3183 
3184 EXTERN_C_BEGIN
3185 #undef __FUNCT__
3186 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3187 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3188 {
3189   Mat_MPIAIJ     *b;
3190   PetscErrorCode ierr;
3191   PetscInt       i;
3192   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3193 
3194   PetscFunctionBegin;
3195   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3196   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3197   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3198   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3199   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3200   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3201 
3202   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3203   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3204   if (d_nnz) {
3205     for (i=0; i<B->rmap->n; i++) {
3206       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3207     }
3208   }
3209   if (o_nnz) {
3210     for (i=0; i<B->rmap->n; i++) {
3211       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3212     }
3213   }
3214   b = (Mat_MPIAIJ*)B->data;
3215 
3216   if (!B->preallocated) {
3217     /* Explicitly create 2 MATSEQAIJ matrices. */
3218     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3219     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3220     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3221     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3222     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3223     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3224     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3225     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3226     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3227     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3228   }
3229 
3230   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3231   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3232   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3233   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3234   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3235   B->preallocated = PETSC_TRUE;
3236   PetscFunctionReturn(0);
3237 }
3238 EXTERN_C_END
3239 
3240 #undef __FUNCT__
3241 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3242 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3243 {
3244   Mat            mat;
3245   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3246   PetscErrorCode ierr;
3247 
3248   PetscFunctionBegin;
3249   *newmat       = 0;
3250   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3251   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3252   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3253   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3254   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3255   a    = (Mat_MPIAIJ*)mat->data;
3256 
3257   mat->factortype    = matin->factortype;
3258   mat->rmap->bs      = matin->rmap->bs;
3259   mat->cmap->bs      = matin->cmap->bs;
3260   mat->assembled    = PETSC_TRUE;
3261   mat->insertmode   = NOT_SET_VALUES;
3262   mat->preallocated = PETSC_TRUE;
3263 
3264   a->size           = oldmat->size;
3265   a->rank           = oldmat->rank;
3266   a->donotstash     = oldmat->donotstash;
3267   a->roworiented    = oldmat->roworiented;
3268   a->rowindices     = 0;
3269   a->rowvalues      = 0;
3270   a->getrowactive   = PETSC_FALSE;
3271 
3272   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3273   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3274 
3275   if (oldmat->colmap) {
3276 #if defined (PETSC_USE_CTABLE)
3277     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3278 #else
3279     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3280     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3281     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3282 #endif
3283   } else a->colmap = 0;
3284   if (oldmat->garray) {
3285     PetscInt len;
3286     len  = oldmat->B->cmap->n;
3287     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3288     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3289     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3290   } else a->garray = 0;
3291 
3292   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3293   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3294   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3295   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3296   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3297   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3298   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3299   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3300   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3301   *newmat = mat;
3302   PetscFunctionReturn(0);
3303 }
3304 
3305 
3306 
3307 #undef __FUNCT__
3308 #define __FUNCT__ "MatLoad_MPIAIJ"
3309 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3310 {
3311   PetscScalar    *vals,*svals;
3312   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3313   PetscErrorCode ierr;
3314   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3315   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3316   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3317   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3318   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3319   int            fd;
3320   PetscInt       bs = 1;
3321 
3322   PetscFunctionBegin;
3323   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3324   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3325   if (!rank) {
3326     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3327     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3328     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3329   }
3330 
3331   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3332   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3333   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3334 
3335   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3336 
3337   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3338   M = header[1]; N = header[2];
3339   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3340   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3341   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3342 
3343   /* If global sizes are set, check if they are consistent with that given in the file */
3344   if (sizesset) {
3345     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3346   }
3347   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3348   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3349 
3350   /* determine ownership of all (block) rows */
3351   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3352   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3353   else m = newMat->rmap->n; /* Set by user */
3354 
3355   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3356   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3357 
3358   /* First process needs enough room for process with most rows */
3359   if (!rank) {
3360     mmax       = rowners[1];
3361     for (i=2; i<size; i++) {
3362       mmax = PetscMax(mmax,rowners[i]);
3363     }
3364   } else mmax = m;
3365 
3366   rowners[0] = 0;
3367   for (i=2; i<=size; i++) {
3368     rowners[i] += rowners[i-1];
3369   }
3370   rstart = rowners[rank];
3371   rend   = rowners[rank+1];
3372 
3373   /* distribute row lengths to all processors */
3374   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3375   if (!rank) {
3376     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3377     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3378     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3379     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3380     for (j=0; j<m; j++) {
3381       procsnz[0] += ourlens[j];
3382     }
3383     for (i=1; i<size; i++) {
3384       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3385       /* calculate the number of nonzeros on each processor */
3386       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3387         procsnz[i] += rowlengths[j];
3388       }
3389       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3390     }
3391     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3392   } else {
3393     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3394   }
3395 
3396   if (!rank) {
3397     /* determine max buffer needed and allocate it */
3398     maxnz = 0;
3399     for (i=0; i<size; i++) {
3400       maxnz = PetscMax(maxnz,procsnz[i]);
3401     }
3402     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3403 
3404     /* read in my part of the matrix column indices  */
3405     nz   = procsnz[0];
3406     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3407     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3408 
3409     /* read in every one elses and ship off */
3410     for (i=1; i<size; i++) {
3411       nz     = procsnz[i];
3412       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3413       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3414     }
3415     ierr = PetscFree(cols);CHKERRQ(ierr);
3416   } else {
3417     /* determine buffer space needed for message */
3418     nz = 0;
3419     for (i=0; i<m; i++) {
3420       nz += ourlens[i];
3421     }
3422     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3423 
3424     /* receive message of column indices*/
3425     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3426   }
3427 
3428   /* determine column ownership if matrix is not square */
3429   if (N != M) {
3430     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3431     else n = newMat->cmap->n;
3432     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3433     cstart = cend - n;
3434   } else {
3435     cstart = rstart;
3436     cend   = rend;
3437     n      = cend - cstart;
3438   }
3439 
3440   /* loop over local rows, determining number of off diagonal entries */
3441   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3442   jj = 0;
3443   for (i=0; i<m; i++) {
3444     for (j=0; j<ourlens[i]; j++) {
3445       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3446       jj++;
3447     }
3448   }
3449 
3450   for (i=0; i<m; i++) {
3451     ourlens[i] -= offlens[i];
3452   }
3453   if (!sizesset) {
3454     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3455   }
3456 
3457   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3458 
3459   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3460 
3461   for (i=0; i<m; i++) {
3462     ourlens[i] += offlens[i];
3463   }
3464 
3465   if (!rank) {
3466     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3467 
3468     /* read in my part of the matrix numerical values  */
3469     nz   = procsnz[0];
3470     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3471 
3472     /* insert into matrix */
3473     jj      = rstart;
3474     smycols = mycols;
3475     svals   = vals;
3476     for (i=0; i<m; i++) {
3477       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3478       smycols += ourlens[i];
3479       svals   += ourlens[i];
3480       jj++;
3481     }
3482 
3483     /* read in other processors and ship out */
3484     for (i=1; i<size; i++) {
3485       nz     = procsnz[i];
3486       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3487       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3488     }
3489     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3490   } else {
3491     /* receive numeric values */
3492     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3493 
3494     /* receive message of values*/
3495     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3496 
3497     /* insert into matrix */
3498     jj      = rstart;
3499     smycols = mycols;
3500     svals   = vals;
3501     for (i=0; i<m; i++) {
3502       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3503       smycols += ourlens[i];
3504       svals   += ourlens[i];
3505       jj++;
3506     }
3507   }
3508   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3509   ierr = PetscFree(vals);CHKERRQ(ierr);
3510   ierr = PetscFree(mycols);CHKERRQ(ierr);
3511   ierr = PetscFree(rowners);CHKERRQ(ierr);
3512 
3513   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3514   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3515 
3516   PetscFunctionReturn(0);
3517 }
3518 
3519 #undef __FUNCT__
3520 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3521 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3522 {
3523   PetscErrorCode ierr;
3524   IS             iscol_local;
3525   PetscInt       csize;
3526 
3527   PetscFunctionBegin;
3528   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3529   if (call == MAT_REUSE_MATRIX) {
3530     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3531     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3532   } else {
3533     PetscInt cbs;
3534     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3535     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3536     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3537   }
3538   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3539   if (call == MAT_INITIAL_MATRIX) {
3540     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3541     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3542   }
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3547 #undef __FUNCT__
3548 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3549 /*
3550     Not great since it makes two copies of the submatrix, first an SeqAIJ
3551   in local and then by concatenating the local matrices the end result.
3552   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3553 
3554   Note: This requires a sequential iscol with all indices.
3555 */
3556 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3557 {
3558   PetscErrorCode ierr;
3559   PetscMPIInt    rank,size;
3560   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3561   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3562   PetscBool      allcolumns, colflag;
3563   Mat            M,Mreuse;
3564   MatScalar      *vwork,*aa;
3565   MPI_Comm       comm = ((PetscObject)mat)->comm;
3566   Mat_SeqAIJ     *aij;
3567 
3568 
3569   PetscFunctionBegin;
3570   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3571   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3572 
3573   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3574   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3575   if (colflag && ncol == mat->cmap->N){
3576     allcolumns = PETSC_TRUE;
3577   } else {
3578     allcolumns = PETSC_FALSE;
3579   }
3580   if (call ==  MAT_REUSE_MATRIX) {
3581     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3582     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3583     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3584   } else {
3585     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3586   }
3587 
3588   /*
3589       m - number of local rows
3590       n - number of columns (same on all processors)
3591       rstart - first row in new global matrix generated
3592   */
3593   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3594   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3595   if (call == MAT_INITIAL_MATRIX) {
3596     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3597     ii  = aij->i;
3598     jj  = aij->j;
3599 
3600     /*
3601         Determine the number of non-zeros in the diagonal and off-diagonal
3602         portions of the matrix in order to do correct preallocation
3603     */
3604 
3605     /* first get start and end of "diagonal" columns */
3606     if (csize == PETSC_DECIDE) {
3607       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3608       if (mglobal == n) { /* square matrix */
3609 	nlocal = m;
3610       } else {
3611         nlocal = n/size + ((n % size) > rank);
3612       }
3613     } else {
3614       nlocal = csize;
3615     }
3616     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3617     rstart = rend - nlocal;
3618     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3619 
3620     /* next, compute all the lengths */
3621     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3622     olens = dlens + m;
3623     for (i=0; i<m; i++) {
3624       jend = ii[i+1] - ii[i];
3625       olen = 0;
3626       dlen = 0;
3627       for (j=0; j<jend; j++) {
3628         if (*jj < rstart || *jj >= rend) olen++;
3629         else dlen++;
3630         jj++;
3631       }
3632       olens[i] = olen;
3633       dlens[i] = dlen;
3634     }
3635     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3636     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3637     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3638     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3639     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3640     ierr = PetscFree(dlens);CHKERRQ(ierr);
3641   } else {
3642     PetscInt ml,nl;
3643 
3644     M = *newmat;
3645     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3646     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3647     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3648     /*
3649          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3650        rather than the slower MatSetValues().
3651     */
3652     M->was_assembled = PETSC_TRUE;
3653     M->assembled     = PETSC_FALSE;
3654   }
3655   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3656   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3657   ii  = aij->i;
3658   jj  = aij->j;
3659   aa  = aij->a;
3660   for (i=0; i<m; i++) {
3661     row   = rstart + i;
3662     nz    = ii[i+1] - ii[i];
3663     cwork = jj;     jj += nz;
3664     vwork = aa;     aa += nz;
3665     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3666   }
3667 
3668   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3669   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3670   *newmat = M;
3671 
3672   /* save submatrix used in processor for next request */
3673   if (call ==  MAT_INITIAL_MATRIX) {
3674     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3675     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3676   }
3677 
3678   PetscFunctionReturn(0);
3679 }
3680 
3681 EXTERN_C_BEGIN
3682 #undef __FUNCT__
3683 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3684 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3685 {
3686   PetscInt       m,cstart, cend,j,nnz,i,d;
3687   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3688   const PetscInt *JJ;
3689   PetscScalar    *values;
3690   PetscErrorCode ierr;
3691 
3692   PetscFunctionBegin;
3693   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3694 
3695   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3696   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3697   m      = B->rmap->n;
3698   cstart = B->cmap->rstart;
3699   cend   = B->cmap->rend;
3700   rstart = B->rmap->rstart;
3701 
3702   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3703 
3704 #if defined(PETSC_USE_DEBUGGING)
3705   for (i=0; i<m; i++) {
3706     nnz     = Ii[i+1]- Ii[i];
3707     JJ      = J + Ii[i];
3708     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3709     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3710     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3711   }
3712 #endif
3713 
3714   for (i=0; i<m; i++) {
3715     nnz     = Ii[i+1]- Ii[i];
3716     JJ      = J + Ii[i];
3717     nnz_max = PetscMax(nnz_max,nnz);
3718     d       = 0;
3719     for (j=0; j<nnz; j++) {
3720       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3721     }
3722     d_nnz[i] = d;
3723     o_nnz[i] = nnz - d;
3724   }
3725   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3726   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3727 
3728   if (v) values = (PetscScalar*)v;
3729   else {
3730     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3731     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3732   }
3733 
3734   for (i=0; i<m; i++) {
3735     ii   = i + rstart;
3736     nnz  = Ii[i+1]- Ii[i];
3737     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3738   }
3739   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3740   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741 
3742   if (!v) {
3743     ierr = PetscFree(values);CHKERRQ(ierr);
3744   }
3745   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3746   PetscFunctionReturn(0);
3747 }
3748 EXTERN_C_END
3749 
3750 #undef __FUNCT__
3751 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3752 /*@
3753    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3754    (the default parallel PETSc format).
3755 
3756    Collective on MPI_Comm
3757 
3758    Input Parameters:
3759 +  B - the matrix
3760 .  i - the indices into j for the start of each local row (starts with zero)
3761 .  j - the column indices for each local row (starts with zero)
3762 -  v - optional values in the matrix
3763 
3764    Level: developer
3765 
3766    Notes:
3767        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3768      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3769      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3770 
3771        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3772 
3773        The format which is used for the sparse matrix input, is equivalent to a
3774     row-major ordering.. i.e for the following matrix, the input data expected is
3775     as shown:
3776 
3777         1 0 0
3778         2 0 3     P0
3779        -------
3780         4 5 6     P1
3781 
3782      Process0 [P0]: rows_owned=[0,1]
3783         i =  {0,1,3}  [size = nrow+1  = 2+1]
3784         j =  {0,0,2}  [size = nz = 6]
3785         v =  {1,2,3}  [size = nz = 6]
3786 
3787      Process1 [P1]: rows_owned=[2]
3788         i =  {0,3}    [size = nrow+1  = 1+1]
3789         j =  {0,1,2}  [size = nz = 6]
3790         v =  {4,5,6}  [size = nz = 6]
3791 
3792 .keywords: matrix, aij, compressed row, sparse, parallel
3793 
3794 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3795           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3796 @*/
3797 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3798 {
3799   PetscErrorCode ierr;
3800 
3801   PetscFunctionBegin;
3802   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3803   PetscFunctionReturn(0);
3804 }
3805 
3806 #undef __FUNCT__
3807 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3808 /*@C
3809    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3810    (the default parallel PETSc format).  For good matrix assembly performance
3811    the user should preallocate the matrix storage by setting the parameters
3812    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3813    performance can be increased by more than a factor of 50.
3814 
3815    Collective on MPI_Comm
3816 
3817    Input Parameters:
3818 +  A - the matrix
3819 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3820            (same value is used for all local rows)
3821 .  d_nnz - array containing the number of nonzeros in the various rows of the
3822            DIAGONAL portion of the local submatrix (possibly different for each row)
3823            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3824            The size of this array is equal to the number of local rows, i.e 'm'.
3825            For matrices that will be factored, you must leave room for (and set)
3826            the diagonal entry even if it is zero.
3827 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3828            submatrix (same value is used for all local rows).
3829 -  o_nnz - array containing the number of nonzeros in the various rows of the
3830            OFF-DIAGONAL portion of the local submatrix (possibly different for
3831            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3832            structure. The size of this array is equal to the number
3833            of local rows, i.e 'm'.
3834 
3835    If the *_nnz parameter is given then the *_nz parameter is ignored
3836 
3837    The AIJ format (also called the Yale sparse matrix format or
3838    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3839    storage.  The stored row and column indices begin with zero.
3840    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3841 
3842    The parallel matrix is partitioned such that the first m0 rows belong to
3843    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3844    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3845 
3846    The DIAGONAL portion of the local submatrix of a processor can be defined
3847    as the submatrix which is obtained by extraction the part corresponding to
3848    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3849    first row that belongs to the processor, r2 is the last row belonging to
3850    the this processor, and c1-c2 is range of indices of the local part of a
3851    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3852    common case of a square matrix, the row and column ranges are the same and
3853    the DIAGONAL part is also square. The remaining portion of the local
3854    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3855 
3856    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3857 
3858    You can call MatGetInfo() to get information on how effective the preallocation was;
3859    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3860    You can also run with the option -info and look for messages with the string
3861    malloc in them to see if additional memory allocation was needed.
3862 
3863    Example usage:
3864 
3865    Consider the following 8x8 matrix with 34 non-zero values, that is
3866    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3867    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3868    as follows:
3869 
3870 .vb
3871             1  2  0  |  0  3  0  |  0  4
3872     Proc0   0  5  6  |  7  0  0  |  8  0
3873             9  0 10  | 11  0  0  | 12  0
3874     -------------------------------------
3875            13  0 14  | 15 16 17  |  0  0
3876     Proc1   0 18  0  | 19 20 21  |  0  0
3877             0  0  0  | 22 23  0  | 24  0
3878     -------------------------------------
3879     Proc2  25 26 27  |  0  0 28  | 29  0
3880            30  0  0  | 31 32 33  |  0 34
3881 .ve
3882 
3883    This can be represented as a collection of submatrices as:
3884 
3885 .vb
3886       A B C
3887       D E F
3888       G H I
3889 .ve
3890 
3891    Where the submatrices A,B,C are owned by proc0, D,E,F are
3892    owned by proc1, G,H,I are owned by proc2.
3893 
3894    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3895    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3896    The 'M','N' parameters are 8,8, and have the same values on all procs.
3897 
3898    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3899    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3900    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3901    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3902    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3903    matrix, ans [DF] as another SeqAIJ matrix.
3904 
3905    When d_nz, o_nz parameters are specified, d_nz storage elements are
3906    allocated for every row of the local diagonal submatrix, and o_nz
3907    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3908    One way to choose d_nz and o_nz is to use the max nonzerors per local
3909    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3910    In this case, the values of d_nz,o_nz are:
3911 .vb
3912      proc0 : dnz = 2, o_nz = 2
3913      proc1 : dnz = 3, o_nz = 2
3914      proc2 : dnz = 1, o_nz = 4
3915 .ve
3916    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3917    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3918    for proc3. i.e we are using 12+15+10=37 storage locations to store
3919    34 values.
3920 
3921    When d_nnz, o_nnz parameters are specified, the storage is specified
3922    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3923    In the above case the values for d_nnz,o_nnz are:
3924 .vb
3925      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3926      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3927      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3928 .ve
3929    Here the space allocated is sum of all the above values i.e 34, and
3930    hence pre-allocation is perfect.
3931 
3932    Level: intermediate
3933 
3934 .keywords: matrix, aij, compressed row, sparse, parallel
3935 
3936 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3937           MPIAIJ, MatGetInfo()
3938 @*/
3939 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3940 {
3941   PetscErrorCode ierr;
3942 
3943   PetscFunctionBegin;
3944   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3945   PetscValidType(B,1);
3946   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3947   PetscFunctionReturn(0);
3948 }
3949 
3950 #undef __FUNCT__
3951 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3952 /*@
3953      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3954          CSR format the local rows.
3955 
3956    Collective on MPI_Comm
3957 
3958    Input Parameters:
3959 +  comm - MPI communicator
3960 .  m - number of local rows (Cannot be PETSC_DECIDE)
3961 .  n - This value should be the same as the local size used in creating the
3962        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3963        calculated if N is given) For square matrices n is almost always m.
3964 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3965 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3966 .   i - row indices
3967 .   j - column indices
3968 -   a - matrix values
3969 
3970    Output Parameter:
3971 .   mat - the matrix
3972 
3973    Level: intermediate
3974 
3975    Notes:
3976        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3977      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3978      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3979 
3980        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3981 
3982        The format which is used for the sparse matrix input, is equivalent to a
3983     row-major ordering.. i.e for the following matrix, the input data expected is
3984     as shown:
3985 
3986         1 0 0
3987         2 0 3     P0
3988        -------
3989         4 5 6     P1
3990 
3991      Process0 [P0]: rows_owned=[0,1]
3992         i =  {0,1,3}  [size = nrow+1  = 2+1]
3993         j =  {0,0,2}  [size = nz = 6]
3994         v =  {1,2,3}  [size = nz = 6]
3995 
3996      Process1 [P1]: rows_owned=[2]
3997         i =  {0,3}    [size = nrow+1  = 1+1]
3998         j =  {0,1,2}  [size = nz = 6]
3999         v =  {4,5,6}  [size = nz = 6]
4000 
4001 .keywords: matrix, aij, compressed row, sparse, parallel
4002 
4003 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4004           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4005 @*/
4006 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4007 {
4008   PetscErrorCode ierr;
4009 
4010   PetscFunctionBegin;
4011   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4012   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4013   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4014   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4015   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4016   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4017   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4018   PetscFunctionReturn(0);
4019 }
4020 
4021 #undef __FUNCT__
4022 #define __FUNCT__ "MatCreateAIJ"
4023 /*@C
4024    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4025    (the default parallel PETSc format).  For good matrix assembly performance
4026    the user should preallocate the matrix storage by setting the parameters
4027    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4028    performance can be increased by more than a factor of 50.
4029 
4030    Collective on MPI_Comm
4031 
4032    Input Parameters:
4033 +  comm - MPI communicator
4034 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4035            This value should be the same as the local size used in creating the
4036            y vector for the matrix-vector product y = Ax.
4037 .  n - This value should be the same as the local size used in creating the
4038        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4039        calculated if N is given) For square matrices n is almost always m.
4040 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4041 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4042 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4043            (same value is used for all local rows)
4044 .  d_nnz - array containing the number of nonzeros in the various rows of the
4045            DIAGONAL portion of the local submatrix (possibly different for each row)
4046            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4047            The size of this array is equal to the number of local rows, i.e 'm'.
4048 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4049            submatrix (same value is used for all local rows).
4050 -  o_nnz - array containing the number of nonzeros in the various rows of the
4051            OFF-DIAGONAL portion of the local submatrix (possibly different for
4052            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4053            structure. The size of this array is equal to the number
4054            of local rows, i.e 'm'.
4055 
4056    Output Parameter:
4057 .  A - the matrix
4058 
4059    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4060    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4061    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4062 
4063    Notes:
4064    If the *_nnz parameter is given then the *_nz parameter is ignored
4065 
4066    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4067    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4068    storage requirements for this matrix.
4069 
4070    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4071    processor than it must be used on all processors that share the object for
4072    that argument.
4073 
4074    The user MUST specify either the local or global matrix dimensions
4075    (possibly both).
4076 
4077    The parallel matrix is partitioned across processors such that the
4078    first m0 rows belong to process 0, the next m1 rows belong to
4079    process 1, the next m2 rows belong to process 2 etc.. where
4080    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4081    values corresponding to [m x N] submatrix.
4082 
4083    The columns are logically partitioned with the n0 columns belonging
4084    to 0th partition, the next n1 columns belonging to the next
4085    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4086 
4087    The DIAGONAL portion of the local submatrix on any given processor
4088    is the submatrix corresponding to the rows and columns m,n
4089    corresponding to the given processor. i.e diagonal matrix on
4090    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4091    etc. The remaining portion of the local submatrix [m x (N-n)]
4092    constitute the OFF-DIAGONAL portion. The example below better
4093    illustrates this concept.
4094 
4095    For a square global matrix we define each processor's diagonal portion
4096    to be its local rows and the corresponding columns (a square submatrix);
4097    each processor's off-diagonal portion encompasses the remainder of the
4098    local matrix (a rectangular submatrix).
4099 
4100    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4101 
4102    When calling this routine with a single process communicator, a matrix of
4103    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4104    type of communicator, use the construction mechanism:
4105      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4106 
4107    By default, this format uses inodes (identical nodes) when possible.
4108    We search for consecutive rows with the same nonzero structure, thereby
4109    reusing matrix information to achieve increased efficiency.
4110 
4111    Options Database Keys:
4112 +  -mat_no_inode  - Do not use inodes
4113 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4114 -  -mat_aij_oneindex - Internally use indexing starting at 1
4115         rather than 0.  Note that when calling MatSetValues(),
4116         the user still MUST index entries starting at 0!
4117 
4118 
4119    Example usage:
4120 
4121    Consider the following 8x8 matrix with 34 non-zero values, that is
4122    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4123    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4124    as follows:
4125 
4126 .vb
4127             1  2  0  |  0  3  0  |  0  4
4128     Proc0   0  5  6  |  7  0  0  |  8  0
4129             9  0 10  | 11  0  0  | 12  0
4130     -------------------------------------
4131            13  0 14  | 15 16 17  |  0  0
4132     Proc1   0 18  0  | 19 20 21  |  0  0
4133             0  0  0  | 22 23  0  | 24  0
4134     -------------------------------------
4135     Proc2  25 26 27  |  0  0 28  | 29  0
4136            30  0  0  | 31 32 33  |  0 34
4137 .ve
4138 
4139    This can be represented as a collection of submatrices as:
4140 
4141 .vb
4142       A B C
4143       D E F
4144       G H I
4145 .ve
4146 
4147    Where the submatrices A,B,C are owned by proc0, D,E,F are
4148    owned by proc1, G,H,I are owned by proc2.
4149 
4150    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4151    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4152    The 'M','N' parameters are 8,8, and have the same values on all procs.
4153 
4154    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4155    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4156    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4157    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4158    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4159    matrix, ans [DF] as another SeqAIJ matrix.
4160 
4161    When d_nz, o_nz parameters are specified, d_nz storage elements are
4162    allocated for every row of the local diagonal submatrix, and o_nz
4163    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4164    One way to choose d_nz and o_nz is to use the max nonzerors per local
4165    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4166    In this case, the values of d_nz,o_nz are:
4167 .vb
4168      proc0 : dnz = 2, o_nz = 2
4169      proc1 : dnz = 3, o_nz = 2
4170      proc2 : dnz = 1, o_nz = 4
4171 .ve
4172    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4173    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4174    for proc3. i.e we are using 12+15+10=37 storage locations to store
4175    34 values.
4176 
4177    When d_nnz, o_nnz parameters are specified, the storage is specified
4178    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4179    In the above case the values for d_nnz,o_nnz are:
4180 .vb
4181      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4182      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4183      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4184 .ve
4185    Here the space allocated is sum of all the above values i.e 34, and
4186    hence pre-allocation is perfect.
4187 
4188    Level: intermediate
4189 
4190 .keywords: matrix, aij, compressed row, sparse, parallel
4191 
4192 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4193           MPIAIJ, MatCreateMPIAIJWithArrays()
4194 @*/
4195 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4196 {
4197   PetscErrorCode ierr;
4198   PetscMPIInt    size;
4199 
4200   PetscFunctionBegin;
4201   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4202   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4203   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4204   if (size > 1) {
4205     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4206     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4207   } else {
4208     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4209     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4210   }
4211   PetscFunctionReturn(0);
4212 }
4213 
4214 #undef __FUNCT__
4215 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4216 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4217 {
4218   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4219 
4220   PetscFunctionBegin;
4221   *Ad     = a->A;
4222   *Ao     = a->B;
4223   *colmap = a->garray;
4224   PetscFunctionReturn(0);
4225 }
4226 
4227 #undef __FUNCT__
4228 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4229 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4230 {
4231   PetscErrorCode ierr;
4232   PetscInt       i;
4233   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4234 
4235   PetscFunctionBegin;
4236   if (coloring->ctype == IS_COLORING_GLOBAL) {
4237     ISColoringValue *allcolors,*colors;
4238     ISColoring      ocoloring;
4239 
4240     /* set coloring for diagonal portion */
4241     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4242 
4243     /* set coloring for off-diagonal portion */
4244     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4245     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4246     for (i=0; i<a->B->cmap->n; i++) {
4247       colors[i] = allcolors[a->garray[i]];
4248     }
4249     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4250     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4251     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4252     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4253   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4254     ISColoringValue *colors;
4255     PetscInt        *larray;
4256     ISColoring      ocoloring;
4257 
4258     /* set coloring for diagonal portion */
4259     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4260     for (i=0; i<a->A->cmap->n; i++) {
4261       larray[i] = i + A->cmap->rstart;
4262     }
4263     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4264     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4265     for (i=0; i<a->A->cmap->n; i++) {
4266       colors[i] = coloring->colors[larray[i]];
4267     }
4268     ierr = PetscFree(larray);CHKERRQ(ierr);
4269     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4270     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4271     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4272 
4273     /* set coloring for off-diagonal portion */
4274     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4275     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4276     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4277     for (i=0; i<a->B->cmap->n; i++) {
4278       colors[i] = coloring->colors[larray[i]];
4279     }
4280     ierr = PetscFree(larray);CHKERRQ(ierr);
4281     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4282     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4283     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4284   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4285 
4286   PetscFunctionReturn(0);
4287 }
4288 
4289 #if defined(PETSC_HAVE_ADIC)
4290 #undef __FUNCT__
4291 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4292 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4293 {
4294   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4295   PetscErrorCode ierr;
4296 
4297   PetscFunctionBegin;
4298   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4299   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4300   PetscFunctionReturn(0);
4301 }
4302 #endif
4303 
4304 #undef __FUNCT__
4305 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4306 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4307 {
4308   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4309   PetscErrorCode ierr;
4310 
4311   PetscFunctionBegin;
4312   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4313   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4314   PetscFunctionReturn(0);
4315 }
4316 
4317 #undef __FUNCT__
4318 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4319 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4320 {
4321   PetscErrorCode ierr;
4322   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4323   PetscInt       *indx;
4324 
4325   PetscFunctionBegin;
4326   /* This routine will ONLY return MPIAIJ type matrix */
4327   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4328   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4329   if (n == PETSC_DECIDE){
4330     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4331   }
4332   /* Check sum(n) = N */
4333   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4334   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4335 
4336   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4337   rstart -= m;
4338 
4339   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4340   for (i=0;i<m;i++) {
4341     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4342     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4343     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4344   }
4345 
4346   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4347   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4348   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4349   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4350   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4351   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4352   PetscFunctionReturn(0);
4353 }
4354 
4355 #undef __FUNCT__
4356 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4357 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4358 {
4359   PetscErrorCode ierr;
4360   PetscInt       m,N,i,rstart,nnz,Ii;
4361   PetscInt       *indx;
4362   PetscScalar    *values;
4363 
4364   PetscFunctionBegin;
4365   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4366   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4367   for (i=0;i<m;i++) {
4368     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4369     Ii    = i + rstart;
4370     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4371     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4372   }
4373   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4374   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4375   PetscFunctionReturn(0);
4376 }
4377 
4378 #undef __FUNCT__
4379 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4380 /*@
4381       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4382                  matrices from each processor
4383 
4384     Collective on MPI_Comm
4385 
4386    Input Parameters:
4387 +    comm - the communicators the parallel matrix will live on
4388 .    inmat - the input sequential matrices
4389 .    n - number of local columns (or PETSC_DECIDE)
4390 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4391 
4392    Output Parameter:
4393 .    outmat - the parallel matrix generated
4394 
4395     Level: advanced
4396 
4397    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4398 
4399 @*/
4400 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4401 {
4402   PetscErrorCode ierr;
4403 
4404   PetscFunctionBegin;
4405   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4406   if (scall == MAT_INITIAL_MATRIX){
4407     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4408   }
4409   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4410   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4411   PetscFunctionReturn(0);
4412 }
4413 
4414 #undef __FUNCT__
4415 #define __FUNCT__ "MatFileSplit"
4416 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4417 {
4418   PetscErrorCode    ierr;
4419   PetscMPIInt       rank;
4420   PetscInt          m,N,i,rstart,nnz;
4421   size_t            len;
4422   const PetscInt    *indx;
4423   PetscViewer       out;
4424   char              *name;
4425   Mat               B;
4426   const PetscScalar *values;
4427 
4428   PetscFunctionBegin;
4429   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4430   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4431   /* Should this be the type of the diagonal block of A? */
4432   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4433   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4434   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4435   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4436   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4437   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4438   for (i=0;i<m;i++) {
4439     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4440     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4441     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4442   }
4443   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4444   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4445 
4446   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4447   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4448   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4449   sprintf(name,"%s.%d",outfile,rank);
4450   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4451   ierr = PetscFree(name);
4452   ierr = MatView(B,out);CHKERRQ(ierr);
4453   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4454   ierr = MatDestroy(&B);CHKERRQ(ierr);
4455   PetscFunctionReturn(0);
4456 }
4457 
4458 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4459 #undef __FUNCT__
4460 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4461 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4462 {
4463   PetscErrorCode       ierr;
4464   Mat_Merge_SeqsToMPI  *merge;
4465   PetscContainer       container;
4466 
4467   PetscFunctionBegin;
4468   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4469   if (container) {
4470     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4483     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4484     ierr = PetscFree(merge);CHKERRQ(ierr);
4485     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4486   }
4487   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 #include <../src/mat/utils/freespace.h>
4492 #include <petscbt.h>
4493 
4494 #undef __FUNCT__
4495 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4496 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4497 {
4498   PetscErrorCode       ierr;
4499   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4500   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4501   PetscMPIInt          size,rank,taga,*len_s;
4502   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4503   PetscInt             proc,m;
4504   PetscInt             **buf_ri,**buf_rj;
4505   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4506   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4507   MPI_Request          *s_waits,*r_waits;
4508   MPI_Status           *status;
4509   MatScalar            *aa=a->a;
4510   MatScalar            **abuf_r,*ba_i;
4511   Mat_Merge_SeqsToMPI  *merge;
4512   PetscContainer       container;
4513 
4514   PetscFunctionBegin;
4515   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4516 
4517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4518   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4519 
4520   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4521   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4522 
4523   bi     = merge->bi;
4524   bj     = merge->bj;
4525   buf_ri = merge->buf_ri;
4526   buf_rj = merge->buf_rj;
4527 
4528   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4529   owners = merge->rowmap->range;
4530   len_s  = merge->len_s;
4531 
4532   /* send and recv matrix values */
4533   /*-----------------------------*/
4534   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4535   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4536 
4537   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4538   for (proc=0,k=0; proc<size; proc++){
4539     if (!len_s[proc]) continue;
4540     i = owners[proc];
4541     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4542     k++;
4543   }
4544 
4545   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4546   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4547   ierr = PetscFree(status);CHKERRQ(ierr);
4548 
4549   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4550   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4551 
4552   /* insert mat values of mpimat */
4553   /*----------------------------*/
4554   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4555   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4556 
4557   for (k=0; k<merge->nrecv; k++){
4558     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4559     nrows = *(buf_ri_k[k]);
4560     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4561     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4562   }
4563 
4564   /* set values of ba */
4565   m = merge->rowmap->n;
4566   for (i=0; i<m; i++) {
4567     arow = owners[rank] + i;
4568     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4569     bnzi = bi[i+1] - bi[i];
4570     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4571 
4572     /* add local non-zero vals of this proc's seqmat into ba */
4573     anzi = ai[arow+1] - ai[arow];
4574     aj   = a->j + ai[arow];
4575     aa   = a->a + ai[arow];
4576     nextaj = 0;
4577     for (j=0; nextaj<anzi; j++){
4578       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4579         ba_i[j] += aa[nextaj++];
4580       }
4581     }
4582 
4583     /* add received vals into ba */
4584     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4585       /* i-th row */
4586       if (i == *nextrow[k]) {
4587         anzi = *(nextai[k]+1) - *nextai[k];
4588         aj   = buf_rj[k] + *(nextai[k]);
4589         aa   = abuf_r[k] + *(nextai[k]);
4590         nextaj = 0;
4591         for (j=0; nextaj<anzi; j++){
4592           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4593             ba_i[j] += aa[nextaj++];
4594           }
4595         }
4596         nextrow[k]++; nextai[k]++;
4597       }
4598     }
4599     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4600   }
4601   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4602   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4603 
4604   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4605   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4606   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4607   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4608   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4609   PetscFunctionReturn(0);
4610 }
4611 
4612 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4613 
4614 #undef __FUNCT__
4615 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4616 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4617 {
4618   PetscErrorCode       ierr;
4619   Mat                  B_mpi;
4620   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4621   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4622   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4623   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4624   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4625   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4626   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4627   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4628   MPI_Status           *status;
4629   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4630   PetscBT              lnkbt;
4631   Mat_Merge_SeqsToMPI  *merge;
4632   PetscContainer       container;
4633 
4634   PetscFunctionBegin;
4635   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4636 
4637   /* make sure it is a PETSc comm */
4638   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4639   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4640   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4641 
4642   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4643   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4644 
4645   /* determine row ownership */
4646   /*---------------------------------------------------------*/
4647   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4648   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4649   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4650   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4651   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4652   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4653   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4654 
4655   m      = merge->rowmap->n;
4656   M      = merge->rowmap->N;
4657   owners = merge->rowmap->range;
4658 
4659   /* determine the number of messages to send, their lengths */
4660   /*---------------------------------------------------------*/
4661   len_s  = merge->len_s;
4662 
4663   len = 0;  /* length of buf_si[] */
4664   merge->nsend = 0;
4665   for (proc=0; proc<size; proc++){
4666     len_si[proc] = 0;
4667     if (proc == rank){
4668       len_s[proc] = 0;
4669     } else {
4670       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4671       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4672     }
4673     if (len_s[proc]) {
4674       merge->nsend++;
4675       nrows = 0;
4676       for (i=owners[proc]; i<owners[proc+1]; i++){
4677         if (ai[i+1] > ai[i]) nrows++;
4678       }
4679       len_si[proc] = 2*(nrows+1);
4680       len += len_si[proc];
4681     }
4682   }
4683 
4684   /* determine the number and length of messages to receive for ij-structure */
4685   /*-------------------------------------------------------------------------*/
4686   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4687   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4688 
4689   /* post the Irecv of j-structure */
4690   /*-------------------------------*/
4691   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4692   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4693 
4694   /* post the Isend of j-structure */
4695   /*--------------------------------*/
4696   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4697 
4698   for (proc=0, k=0; proc<size; proc++){
4699     if (!len_s[proc]) continue;
4700     i = owners[proc];
4701     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4702     k++;
4703   }
4704 
4705   /* receives and sends of j-structure are complete */
4706   /*------------------------------------------------*/
4707   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4708   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4709 
4710   /* send and recv i-structure */
4711   /*---------------------------*/
4712   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4713   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4714 
4715   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4716   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4717   for (proc=0,k=0; proc<size; proc++){
4718     if (!len_s[proc]) continue;
4719     /* form outgoing message for i-structure:
4720          buf_si[0]:                 nrows to be sent
4721                [1:nrows]:           row index (global)
4722                [nrows+1:2*nrows+1]: i-structure index
4723     */
4724     /*-------------------------------------------*/
4725     nrows = len_si[proc]/2 - 1;
4726     buf_si_i    = buf_si + nrows+1;
4727     buf_si[0]   = nrows;
4728     buf_si_i[0] = 0;
4729     nrows = 0;
4730     for (i=owners[proc]; i<owners[proc+1]; i++){
4731       anzi = ai[i+1] - ai[i];
4732       if (anzi) {
4733         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4734         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4735         nrows++;
4736       }
4737     }
4738     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4739     k++;
4740     buf_si += len_si[proc];
4741   }
4742 
4743   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4744   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4745 
4746   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4747   for (i=0; i<merge->nrecv; i++){
4748     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4749   }
4750 
4751   ierr = PetscFree(len_si);CHKERRQ(ierr);
4752   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4753   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4754   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4755   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4756   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4757   ierr = PetscFree(status);CHKERRQ(ierr);
4758 
4759   /* compute a local seq matrix in each processor */
4760   /*----------------------------------------------*/
4761   /* allocate bi array and free space for accumulating nonzero column info */
4762   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4763   bi[0] = 0;
4764 
4765   /* create and initialize a linked list */
4766   nlnk = N+1;
4767   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4768 
4769   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4770   len = 0;
4771   len  = ai[owners[rank+1]] - ai[owners[rank]];
4772   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4773   current_space = free_space;
4774 
4775   /* determine symbolic info for each local row */
4776   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4777 
4778   for (k=0; k<merge->nrecv; k++){
4779     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4780     nrows = *buf_ri_k[k];
4781     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4782     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4783   }
4784 
4785   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4786   len = 0;
4787   for (i=0;i<m;i++) {
4788     bnzi   = 0;
4789     /* add local non-zero cols of this proc's seqmat into lnk */
4790     arow   = owners[rank] + i;
4791     anzi   = ai[arow+1] - ai[arow];
4792     aj     = a->j + ai[arow];
4793     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4794     bnzi += nlnk;
4795     /* add received col data into lnk */
4796     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4797       if (i == *nextrow[k]) { /* i-th row */
4798         anzi = *(nextai[k]+1) - *nextai[k];
4799         aj   = buf_rj[k] + *nextai[k];
4800         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4801         bnzi += nlnk;
4802         nextrow[k]++; nextai[k]++;
4803       }
4804     }
4805     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4806 
4807     /* if free space is not available, make more free space */
4808     if (current_space->local_remaining<bnzi) {
4809       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4810       nspacedouble++;
4811     }
4812     /* copy data into free space, then initialize lnk */
4813     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4814     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4815 
4816     current_space->array           += bnzi;
4817     current_space->local_used      += bnzi;
4818     current_space->local_remaining -= bnzi;
4819 
4820     bi[i+1] = bi[i] + bnzi;
4821   }
4822 
4823   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4824 
4825   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4826   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4827   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4828 
4829   /* create symbolic parallel matrix B_mpi */
4830   /*---------------------------------------*/
4831     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4832   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4833   if (n==PETSC_DECIDE) {
4834     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4835   } else {
4836     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4837   }
4838   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4839   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4840   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4841   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4842   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4843 
4844   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4845   B_mpi->assembled     = PETSC_FALSE;
4846   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4847   merge->bi            = bi;
4848   merge->bj            = bj;
4849   merge->buf_ri        = buf_ri;
4850   merge->buf_rj        = buf_rj;
4851   merge->coi           = PETSC_NULL;
4852   merge->coj           = PETSC_NULL;
4853   merge->owners_co     = PETSC_NULL;
4854 
4855   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4856 
4857   /* attach the supporting struct to B_mpi for reuse */
4858   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4859   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4860   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4861   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4862   *mpimat = B_mpi;
4863 
4864   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4865   PetscFunctionReturn(0);
4866 }
4867 
4868 #undef __FUNCT__
4869 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4870 /*@C
4871       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4872                  matrices from each processor
4873 
4874     Collective on MPI_Comm
4875 
4876    Input Parameters:
4877 +    comm - the communicators the parallel matrix will live on
4878 .    seqmat - the input sequential matrices
4879 .    m - number of local rows (or PETSC_DECIDE)
4880 .    n - number of local columns (or PETSC_DECIDE)
4881 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4882 
4883    Output Parameter:
4884 .    mpimat - the parallel matrix generated
4885 
4886     Level: advanced
4887 
4888    Notes:
4889      The dimensions of the sequential matrix in each processor MUST be the same.
4890      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4891      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4892 @*/
4893 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4894 {
4895   PetscErrorCode   ierr;
4896   PetscMPIInt     size;
4897 
4898   PetscFunctionBegin;
4899   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4900   if (size == 1){
4901      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4902      if (scall == MAT_INITIAL_MATRIX){
4903        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4904      } else {
4905        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4906      }
4907      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4908      PetscFunctionReturn(0);
4909   }
4910   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4911   if (scall == MAT_INITIAL_MATRIX){
4912     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4913   }
4914   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4915   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 #undef __FUNCT__
4920 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4921 /*@
4922      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4923           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4924           with MatGetSize()
4925 
4926     Not Collective
4927 
4928    Input Parameters:
4929 +    A - the matrix
4930 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4931 
4932    Output Parameter:
4933 .    A_loc - the local sequential matrix generated
4934 
4935     Level: developer
4936 
4937 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4938 
4939 @*/
4940 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4941 {
4942   PetscErrorCode  ierr;
4943   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4944   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4945   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4946   MatScalar       *aa=a->a,*ba=b->a,*cam;
4947   PetscScalar     *ca;
4948   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4949   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4950   PetscBool       match;
4951 
4952   PetscFunctionBegin;
4953   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4954   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4955   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4956   if (scall == MAT_INITIAL_MATRIX){
4957     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4958     ci[0] = 0;
4959     for (i=0; i<am; i++){
4960       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4961     }
4962     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4963     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4964     k = 0;
4965     for (i=0; i<am; i++) {
4966       ncols_o = bi[i+1] - bi[i];
4967       ncols_d = ai[i+1] - ai[i];
4968       /* off-diagonal portion of A */
4969       for (jo=0; jo<ncols_o; jo++) {
4970         col = cmap[*bj];
4971         if (col >= cstart) break;
4972         cj[k]   = col; bj++;
4973         ca[k++] = *ba++;
4974       }
4975       /* diagonal portion of A */
4976       for (j=0; j<ncols_d; j++) {
4977         cj[k]   = cstart + *aj++;
4978         ca[k++] = *aa++;
4979       }
4980       /* off-diagonal portion of A */
4981       for (j=jo; j<ncols_o; j++) {
4982         cj[k]   = cmap[*bj++];
4983         ca[k++] = *ba++;
4984       }
4985     }
4986     /* put together the new matrix */
4987     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4988     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4989     /* Since these are PETSc arrays, change flags to free them as necessary. */
4990     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4991     mat->free_a  = PETSC_TRUE;
4992     mat->free_ij = PETSC_TRUE;
4993     mat->nonew   = 0;
4994   } else if (scall == MAT_REUSE_MATRIX){
4995     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4996     ci = mat->i; cj = mat->j; cam = mat->a;
4997     for (i=0; i<am; i++) {
4998       /* off-diagonal portion of A */
4999       ncols_o = bi[i+1] - bi[i];
5000       for (jo=0; jo<ncols_o; jo++) {
5001         col = cmap[*bj];
5002         if (col >= cstart) break;
5003         *cam++ = *ba++; bj++;
5004       }
5005       /* diagonal portion of A */
5006       ncols_d = ai[i+1] - ai[i];
5007       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5008       /* off-diagonal portion of A */
5009       for (j=jo; j<ncols_o; j++) {
5010         *cam++ = *ba++; bj++;
5011       }
5012     }
5013   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5014   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5015   PetscFunctionReturn(0);
5016 }
5017 
5018 #undef __FUNCT__
5019 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5020 /*@C
5021      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5022 
5023     Not Collective
5024 
5025    Input Parameters:
5026 +    A - the matrix
5027 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5028 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5029 
5030    Output Parameter:
5031 .    A_loc - the local sequential matrix generated
5032 
5033     Level: developer
5034 
5035 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5036 
5037 @*/
5038 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5039 {
5040   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5041   PetscErrorCode    ierr;
5042   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5043   IS                isrowa,iscola;
5044   Mat               *aloc;
5045   PetscBool       match;
5046 
5047   PetscFunctionBegin;
5048   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5049   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5050   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5051   if (!row){
5052     start = A->rmap->rstart; end = A->rmap->rend;
5053     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5054   } else {
5055     isrowa = *row;
5056   }
5057   if (!col){
5058     start = A->cmap->rstart;
5059     cmap  = a->garray;
5060     nzA   = a->A->cmap->n;
5061     nzB   = a->B->cmap->n;
5062     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5063     ncols = 0;
5064     for (i=0; i<nzB; i++) {
5065       if (cmap[i] < start) idx[ncols++] = cmap[i];
5066       else break;
5067     }
5068     imark = i;
5069     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5070     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5071     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5072   } else {
5073     iscola = *col;
5074   }
5075   if (scall != MAT_INITIAL_MATRIX){
5076     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5077     aloc[0] = *A_loc;
5078   }
5079   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5080   *A_loc = aloc[0];
5081   ierr = PetscFree(aloc);CHKERRQ(ierr);
5082   if (!row){
5083     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5084   }
5085   if (!col){
5086     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5087   }
5088   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5089   PetscFunctionReturn(0);
5090 }
5091 
5092 #undef __FUNCT__
5093 #define __FUNCT__ "MatGetBrowsOfAcols"
5094 /*@C
5095     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5096 
5097     Collective on Mat
5098 
5099    Input Parameters:
5100 +    A,B - the matrices in mpiaij format
5101 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5102 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5103 
5104    Output Parameter:
5105 +    rowb, colb - index sets of rows and columns of B to extract
5106 -    B_seq - the sequential matrix generated
5107 
5108     Level: developer
5109 
5110 @*/
5111 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5112 {
5113   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5114   PetscErrorCode    ierr;
5115   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5116   IS                isrowb,iscolb;
5117   Mat               *bseq=PETSC_NULL;
5118 
5119   PetscFunctionBegin;
5120   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5121     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5122   }
5123   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5124 
5125   if (scall == MAT_INITIAL_MATRIX){
5126     start = A->cmap->rstart;
5127     cmap  = a->garray;
5128     nzA   = a->A->cmap->n;
5129     nzB   = a->B->cmap->n;
5130     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5131     ncols = 0;
5132     for (i=0; i<nzB; i++) {  /* row < local row index */
5133       if (cmap[i] < start) idx[ncols++] = cmap[i];
5134       else break;
5135     }
5136     imark = i;
5137     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5138     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5139     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5140     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5141   } else {
5142     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5143     isrowb = *rowb; iscolb = *colb;
5144     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5145     bseq[0] = *B_seq;
5146   }
5147   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5148   *B_seq = bseq[0];
5149   ierr = PetscFree(bseq);CHKERRQ(ierr);
5150   if (!rowb){
5151     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5152   } else {
5153     *rowb = isrowb;
5154   }
5155   if (!colb){
5156     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5157   } else {
5158     *colb = iscolb;
5159   }
5160   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5161   PetscFunctionReturn(0);
5162 }
5163 
5164 #undef __FUNCT__
5165 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5166 /*
5167     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5168     of the OFF-DIAGONAL portion of local A
5169 
5170     Collective on Mat
5171 
5172    Input Parameters:
5173 +    A,B - the matrices in mpiaij format
5174 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5175 
5176    Output Parameter:
5177 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5178 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5179 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5180 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5181 
5182     Level: developer
5183 
5184 */
5185 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5186 {
5187   VecScatter_MPI_General *gen_to,*gen_from;
5188   PetscErrorCode         ierr;
5189   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5190   Mat_SeqAIJ             *b_oth;
5191   VecScatter             ctx=a->Mvctx;
5192   MPI_Comm               comm=((PetscObject)ctx)->comm;
5193   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5194   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5195   PetscScalar            *rvalues,*svalues;
5196   MatScalar              *b_otha,*bufa,*bufA;
5197   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5198   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5199   MPI_Status             *sstatus,rstatus;
5200   PetscMPIInt            jj;
5201   PetscInt               *cols,sbs,rbs;
5202   PetscScalar            *vals;
5203 
5204   PetscFunctionBegin;
5205   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5206     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5207   }
5208   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5209   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5210 
5211   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5212   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5213   rvalues  = gen_from->values; /* holds the length of receiving row */
5214   svalues  = gen_to->values;   /* holds the length of sending row */
5215   nrecvs   = gen_from->n;
5216   nsends   = gen_to->n;
5217 
5218   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5219   srow     = gen_to->indices;   /* local row index to be sent */
5220   sstarts  = gen_to->starts;
5221   sprocs   = gen_to->procs;
5222   sstatus  = gen_to->sstatus;
5223   sbs      = gen_to->bs;
5224   rstarts  = gen_from->starts;
5225   rprocs   = gen_from->procs;
5226   rbs      = gen_from->bs;
5227 
5228   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5229   if (scall == MAT_INITIAL_MATRIX){
5230     /* i-array */
5231     /*---------*/
5232     /*  post receives */
5233     for (i=0; i<nrecvs; i++){
5234       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5235       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5236       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5237     }
5238 
5239     /* pack the outgoing message */
5240     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5241     sstartsj[0] = 0;  rstartsj[0] = 0;
5242     len = 0; /* total length of j or a array to be sent */
5243     k = 0;
5244     for (i=0; i<nsends; i++){
5245       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5246       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5247       for (j=0; j<nrows; j++) {
5248         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5249         for (l=0; l<sbs; l++){
5250           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5251           rowlen[j*sbs+l] = ncols;
5252           len += ncols;
5253           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5254         }
5255         k++;
5256       }
5257       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5258       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5259     }
5260     /* recvs and sends of i-array are completed */
5261     i = nrecvs;
5262     while (i--) {
5263       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5264     }
5265     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5266 
5267     /* allocate buffers for sending j and a arrays */
5268     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5269     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5270 
5271     /* create i-array of B_oth */
5272     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5273     b_othi[0] = 0;
5274     len = 0; /* total length of j or a array to be received */
5275     k = 0;
5276     for (i=0; i<nrecvs; i++){
5277       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5278       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5279       for (j=0; j<nrows; j++) {
5280         b_othi[k+1] = b_othi[k] + rowlen[j];
5281         len += rowlen[j]; k++;
5282       }
5283       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5284     }
5285 
5286     /* allocate space for j and a arrrays of B_oth */
5287     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5288     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5289 
5290     /* j-array */
5291     /*---------*/
5292     /*  post receives of j-array */
5293     for (i=0; i<nrecvs; i++){
5294       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5295       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5296     }
5297 
5298     /* pack the outgoing message j-array */
5299     k = 0;
5300     for (i=0; i<nsends; i++){
5301       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5302       bufJ = bufj+sstartsj[i];
5303       for (j=0; j<nrows; j++) {
5304         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5305         for (ll=0; ll<sbs; ll++){
5306           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5307           for (l=0; l<ncols; l++){
5308             *bufJ++ = cols[l];
5309           }
5310           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5311         }
5312       }
5313       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5314     }
5315 
5316     /* recvs and sends of j-array are completed */
5317     i = nrecvs;
5318     while (i--) {
5319       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5320     }
5321     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5322   } else if (scall == MAT_REUSE_MATRIX){
5323     sstartsj = *startsj_s;
5324     rstartsj = *startsj_r;
5325     bufa     = *bufa_ptr;
5326     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5327     b_otha   = b_oth->a;
5328   } else {
5329     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5330   }
5331 
5332   /* a-array */
5333   /*---------*/
5334   /*  post receives of a-array */
5335   for (i=0; i<nrecvs; i++){
5336     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5337     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5338   }
5339 
5340   /* pack the outgoing message a-array */
5341   k = 0;
5342   for (i=0; i<nsends; i++){
5343     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5344     bufA = bufa+sstartsj[i];
5345     for (j=0; j<nrows; j++) {
5346       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5347       for (ll=0; ll<sbs; ll++){
5348         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5349         for (l=0; l<ncols; l++){
5350           *bufA++ = vals[l];
5351         }
5352         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5353       }
5354     }
5355     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5356   }
5357   /* recvs and sends of a-array are completed */
5358   i = nrecvs;
5359   while (i--) {
5360     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5361   }
5362   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5363   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5364 
5365   if (scall == MAT_INITIAL_MATRIX){
5366     /* put together the new matrix */
5367     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5368 
5369     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5370     /* Since these are PETSc arrays, change flags to free them as necessary. */
5371     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5372     b_oth->free_a  = PETSC_TRUE;
5373     b_oth->free_ij = PETSC_TRUE;
5374     b_oth->nonew   = 0;
5375 
5376     ierr = PetscFree(bufj);CHKERRQ(ierr);
5377     if (!startsj_s || !bufa_ptr){
5378       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5379       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5380     } else {
5381       *startsj_s = sstartsj;
5382       *startsj_r = rstartsj;
5383       *bufa_ptr  = bufa;
5384     }
5385   }
5386   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5387   PetscFunctionReturn(0);
5388 }
5389 
5390 #undef __FUNCT__
5391 #define __FUNCT__ "MatGetCommunicationStructs"
5392 /*@C
5393   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5394 
5395   Not Collective
5396 
5397   Input Parameters:
5398 . A - The matrix in mpiaij format
5399 
5400   Output Parameter:
5401 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5402 . colmap - A map from global column index to local index into lvec
5403 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5404 
5405   Level: developer
5406 
5407 @*/
5408 #if defined (PETSC_USE_CTABLE)
5409 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5410 #else
5411 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5412 #endif
5413 {
5414   Mat_MPIAIJ *a;
5415 
5416   PetscFunctionBegin;
5417   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5418   PetscValidPointer(lvec, 2);
5419   PetscValidPointer(colmap, 3);
5420   PetscValidPointer(multScatter, 4);
5421   a = (Mat_MPIAIJ *) A->data;
5422   if (lvec) *lvec = a->lvec;
5423   if (colmap) *colmap = a->colmap;
5424   if (multScatter) *multScatter = a->Mvctx;
5425   PetscFunctionReturn(0);
5426 }
5427 
5428 EXTERN_C_BEGIN
5429 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5430 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5431 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5432 EXTERN_C_END
5433 
5434 #undef __FUNCT__
5435 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5436 /*
5437     Computes (B'*A')' since computing B*A directly is untenable
5438 
5439                n                       p                          p
5440         (              )       (              )         (                  )
5441       m (      A       )  *  n (       B      )   =   m (         C        )
5442         (              )       (              )         (                  )
5443 
5444 */
5445 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5446 {
5447   PetscErrorCode     ierr;
5448   Mat                At,Bt,Ct;
5449 
5450   PetscFunctionBegin;
5451   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5452   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5453   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5454   ierr = MatDestroy(&At);CHKERRQ(ierr);
5455   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5456   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5457   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5458   PetscFunctionReturn(0);
5459 }
5460 
5461 #undef __FUNCT__
5462 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5463 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5464 {
5465   PetscErrorCode ierr;
5466   PetscInt       m=A->rmap->n,n=B->cmap->n;
5467   Mat            Cmat;
5468 
5469   PetscFunctionBegin;
5470   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5471   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5472   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5473   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5474   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5475   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5476   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5477   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5478   *C   = Cmat;
5479   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5480   PetscFunctionReturn(0);
5481 }
5482 
5483 /* ----------------------------------------------------------------*/
5484 #undef __FUNCT__
5485 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5486 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5487 {
5488   PetscErrorCode ierr;
5489 
5490   PetscFunctionBegin;
5491   if (scall == MAT_INITIAL_MATRIX){
5492     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5493   }
5494   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5495   PetscFunctionReturn(0);
5496 }
5497 
5498 EXTERN_C_BEGIN
5499 #if defined(PETSC_HAVE_MUMPS)
5500 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5501 #endif
5502 #if defined(PETSC_HAVE_PASTIX)
5503 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5504 #endif
5505 #if defined(PETSC_HAVE_SUPERLU_DIST)
5506 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5507 #endif
5508 #if defined(PETSC_HAVE_SPOOLES)
5509 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5510 #endif
5511 EXTERN_C_END
5512 
5513 /*MC
5514    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5515 
5516    Options Database Keys:
5517 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5518 
5519   Level: beginner
5520 
5521 .seealso: MatCreateAIJ()
5522 M*/
5523 
5524 EXTERN_C_BEGIN
5525 #undef __FUNCT__
5526 #define __FUNCT__ "MatCreate_MPIAIJ"
5527 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5528 {
5529   Mat_MPIAIJ     *b;
5530   PetscErrorCode ierr;
5531   PetscMPIInt    size;
5532 
5533   PetscFunctionBegin;
5534   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5535 
5536   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5537   B->data         = (void*)b;
5538   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5539   B->assembled    = PETSC_FALSE;
5540 
5541   B->insertmode   = NOT_SET_VALUES;
5542   b->size         = size;
5543   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5544 
5545   /* build cache for off array entries formed */
5546   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5547   b->donotstash  = PETSC_FALSE;
5548   b->colmap      = 0;
5549   b->garray      = 0;
5550   b->roworiented = PETSC_TRUE;
5551 
5552   /* stuff used for matrix vector multiply */
5553   b->lvec      = PETSC_NULL;
5554   b->Mvctx     = PETSC_NULL;
5555 
5556   /* stuff for MatGetRow() */
5557   b->rowindices   = 0;
5558   b->rowvalues    = 0;
5559   b->getrowactive = PETSC_FALSE;
5560 
5561 #if defined(PETSC_HAVE_SPOOLES)
5562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5563                                      "MatGetFactor_mpiaij_spooles",
5564                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5565 #endif
5566 #if defined(PETSC_HAVE_MUMPS)
5567   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5568                                      "MatGetFactor_aij_mumps",
5569                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5570 #endif
5571 #if defined(PETSC_HAVE_PASTIX)
5572   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5573 					   "MatGetFactor_mpiaij_pastix",
5574 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5575 #endif
5576 #if defined(PETSC_HAVE_SUPERLU_DIST)
5577   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5578                                      "MatGetFactor_mpiaij_superlu_dist",
5579                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5580 #endif
5581   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5582                                      "MatStoreValues_MPIAIJ",
5583                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5585                                      "MatRetrieveValues_MPIAIJ",
5586                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5588 				     "MatGetDiagonalBlock_MPIAIJ",
5589                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5590   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5591 				     "MatIsTranspose_MPIAIJ",
5592 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5594 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5595 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5597 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5598 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5600 				     "MatDiagonalScaleLocal_MPIAIJ",
5601 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5603                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5604                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5606                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5607                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5609                                      "MatConvert_MPIAIJ_MPISBAIJ",
5610                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5612                                      "MatMatMult_MPIDense_MPIAIJ",
5613                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5615                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5616                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5618                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5619                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5621   PetscFunctionReturn(0);
5622 }
5623 EXTERN_C_END
5624 
5625 #undef __FUNCT__
5626 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5627 /*@
5628      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5629          and "off-diagonal" part of the matrix in CSR format.
5630 
5631    Collective on MPI_Comm
5632 
5633    Input Parameters:
5634 +  comm - MPI communicator
5635 .  m - number of local rows (Cannot be PETSC_DECIDE)
5636 .  n - This value should be the same as the local size used in creating the
5637        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5638        calculated if N is given) For square matrices n is almost always m.
5639 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5640 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5641 .   i - row indices for "diagonal" portion of matrix
5642 .   j - column indices
5643 .   a - matrix values
5644 .   oi - row indices for "off-diagonal" portion of matrix
5645 .   oj - column indices
5646 -   oa - matrix values
5647 
5648    Output Parameter:
5649 .   mat - the matrix
5650 
5651    Level: advanced
5652 
5653    Notes:
5654        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5655        must free the arrays once the matrix has been destroyed and not before.
5656 
5657        The i and j indices are 0 based
5658 
5659        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5660 
5661        This sets local rows and cannot be used to set off-processor values.
5662 
5663        You cannot later use MatSetValues() to change values in this matrix.
5664 
5665 .keywords: matrix, aij, compressed row, sparse, parallel
5666 
5667 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5668           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5669 @*/
5670 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5671 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5672 {
5673   PetscErrorCode ierr;
5674   Mat_MPIAIJ     *maij;
5675 
5676  PetscFunctionBegin;
5677   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5678   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5679   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5680   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5681   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5682   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5683   maij = (Mat_MPIAIJ*) (*mat)->data;
5684   maij->donotstash     = PETSC_TRUE;
5685   (*mat)->preallocated = PETSC_TRUE;
5686 
5687   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5688   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5689 
5690   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5691   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5692 
5693   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5694   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5695   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5697 
5698   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700   PetscFunctionReturn(0);
5701 }
5702 
5703 /*
5704     Special version for direct calls from Fortran
5705 */
5706 #include <petsc-private/fortranimpl.h>
5707 
5708 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5709 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5710 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5711 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5712 #endif
5713 
5714 /* Change these macros so can be used in void function */
5715 #undef CHKERRQ
5716 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5717 #undef SETERRQ2
5718 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5719 #undef SETERRQ3
5720 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5721 #undef SETERRQ
5722 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5723 
5724 EXTERN_C_BEGIN
5725 #undef __FUNCT__
5726 #define __FUNCT__ "matsetvaluesmpiaij_"
5727 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5728 {
5729   Mat             mat = *mmat;
5730   PetscInt        m = *mm, n = *mn;
5731   InsertMode      addv = *maddv;
5732   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5733   PetscScalar     value;
5734   PetscErrorCode  ierr;
5735 
5736   MatCheckPreallocated(mat,1);
5737   if (mat->insertmode == NOT_SET_VALUES) {
5738     mat->insertmode = addv;
5739   }
5740 #if defined(PETSC_USE_DEBUG)
5741   else if (mat->insertmode != addv) {
5742     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5743   }
5744 #endif
5745   {
5746   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5747   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5748   PetscBool       roworiented = aij->roworiented;
5749 
5750   /* Some Variables required in the macro */
5751   Mat             A = aij->A;
5752   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5753   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5754   MatScalar       *aa = a->a;
5755   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5756   Mat             B = aij->B;
5757   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5758   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5759   MatScalar       *ba = b->a;
5760 
5761   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5762   PetscInt        nonew = a->nonew;
5763   MatScalar       *ap1,*ap2;
5764 
5765   PetscFunctionBegin;
5766   for (i=0; i<m; i++) {
5767     if (im[i] < 0) continue;
5768 #if defined(PETSC_USE_DEBUG)
5769     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5770 #endif
5771     if (im[i] >= rstart && im[i] < rend) {
5772       row      = im[i] - rstart;
5773       lastcol1 = -1;
5774       rp1      = aj + ai[row];
5775       ap1      = aa + ai[row];
5776       rmax1    = aimax[row];
5777       nrow1    = ailen[row];
5778       low1     = 0;
5779       high1    = nrow1;
5780       lastcol2 = -1;
5781       rp2      = bj + bi[row];
5782       ap2      = ba + bi[row];
5783       rmax2    = bimax[row];
5784       nrow2    = bilen[row];
5785       low2     = 0;
5786       high2    = nrow2;
5787 
5788       for (j=0; j<n; j++) {
5789         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5790         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5791         if (in[j] >= cstart && in[j] < cend){
5792           col = in[j] - cstart;
5793           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5794         } else if (in[j] < 0) continue;
5795 #if defined(PETSC_USE_DEBUG)
5796         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5797 #endif
5798         else {
5799           if (mat->was_assembled) {
5800             if (!aij->colmap) {
5801               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5802             }
5803 #if defined (PETSC_USE_CTABLE)
5804             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5805 	    col--;
5806 #else
5807             col = aij->colmap[in[j]] - 1;
5808 #endif
5809             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5810               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5811               col =  in[j];
5812               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5813               B = aij->B;
5814               b = (Mat_SeqAIJ*)B->data;
5815               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5816               rp2      = bj + bi[row];
5817               ap2      = ba + bi[row];
5818               rmax2    = bimax[row];
5819               nrow2    = bilen[row];
5820               low2     = 0;
5821               high2    = nrow2;
5822               bm       = aij->B->rmap->n;
5823               ba = b->a;
5824             }
5825           } else col = in[j];
5826           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5827         }
5828       }
5829     } else {
5830       if (!aij->donotstash) {
5831         if (roworiented) {
5832           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5833         } else {
5834           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5835         }
5836       }
5837     }
5838   }}
5839   PetscFunctionReturnVoid();
5840 }
5841 EXTERN_C_END
5842 
5843