xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ca45077f9abf8e7344aac749d49eef6cfd88f621)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302 
1303   PetscFunctionBegin;
1304   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1305   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1306   nz   = A->nz + B->nz;
1307   if (!rank) {
1308     header[0] = MAT_FILE_CLASSID;
1309     header[1] = mat->rmap->N;
1310     header[2] = mat->cmap->N;
1311     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1312     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1313     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1314     /* get largest number of rows any processor has */
1315     rlen = mat->rmap->n;
1316     range = mat->rmap->range;
1317     for (i=1; i<size; i++) {
1318       rlen = PetscMax(rlen,range[i+1] - range[i]);
1319     }
1320   } else {
1321     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1322     rlen = mat->rmap->n;
1323   }
1324 
1325   /* load up the local row counts */
1326   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1327   for (i=0; i<mat->rmap->n; i++) {
1328     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1329   }
1330 
1331   /* store the row lengths to the file */
1332   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1333   if (!rank) {
1334     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     for (i=1; i<size; i++) {
1336       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1337       rlen = range[i+1] - range[i];
1338       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1340     }
1341     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1342   } else {
1343     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1344     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1346   }
1347   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1348 
1349   /* load up the local column indices */
1350   nzmax = nz; /* )th processor needs space a largest processor needs */
1351   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1352   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1353   cnt  = 0;
1354   for (i=0; i<mat->rmap->n; i++) {
1355     for (j=B->i[i]; j<B->i[i+1]; j++) {
1356       if ( (col = garray[B->j[j]]) > cstart) break;
1357       column_indices[cnt++] = col;
1358     }
1359     for (k=A->i[i]; k<A->i[i+1]; k++) {
1360       column_indices[cnt++] = A->j[k] + cstart;
1361     }
1362     for (; j<B->i[i+1]; j++) {
1363       column_indices[cnt++] = garray[B->j[j]];
1364     }
1365   }
1366   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1367 
1368   /* store the column indices to the file */
1369    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1370   if (!rank) {
1371     MPI_Status status;
1372     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1373     for (i=1; i<size; i++) {
1374       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1375       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1376       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1377       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1378       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1379     }
1380      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1381   } else {
1382     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1383     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1384     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1386   }
1387   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1388 
1389   /* load up the local column values */
1390   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1391   cnt  = 0;
1392   for (i=0; i<mat->rmap->n; i++) {
1393     for (j=B->i[i]; j<B->i[i+1]; j++) {
1394       if ( garray[B->j[j]] > cstart) break;
1395       column_values[cnt++] = B->a[j];
1396     }
1397     for (k=A->i[i]; k<A->i[i+1]; k++) {
1398       column_values[cnt++] = A->a[k];
1399     }
1400     for (; j<B->i[i+1]; j++) {
1401       column_values[cnt++] = B->a[j];
1402     }
1403   }
1404   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1405 
1406   /* store the column values to the file */
1407    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1408   if (!rank) {
1409     MPI_Status status;
1410     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1411     for (i=1; i<size; i++) {
1412        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1413       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1414       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1415       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1416       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1417     }
1418     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1419   } else {
1420     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1421     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1422     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1424   }
1425   ierr = PetscFree(column_values);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1432 {
1433   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1434   PetscErrorCode    ierr;
1435   PetscMPIInt       rank = aij->rank,size = aij->size;
1436   PetscBool         isdraw,iascii,isbinary;
1437   PetscViewer       sviewer;
1438   PetscViewerFormat format;
1439 
1440   PetscFunctionBegin;
1441   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1442   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1443   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1444   if (iascii) {
1445     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1446     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1447       MatInfo    info;
1448       PetscBool  inodes;
1449 
1450       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1451       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1452       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1454       if (!inodes) {
1455         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1456 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1457       } else {
1458         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1459 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1460       }
1461       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1463       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1465       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1468       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1469       PetscFunctionReturn(0);
1470     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1471       PetscInt   inodecount,inodelimit,*inodes;
1472       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1473       if (inodes) {
1474         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1475       } else {
1476         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1477       }
1478       PetscFunctionReturn(0);
1479     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1480       PetscFunctionReturn(0);
1481     }
1482   } else if (isbinary) {
1483     if (size == 1) {
1484       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1485       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1486     } else {
1487       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1488     }
1489     PetscFunctionReturn(0);
1490   } else if (isdraw) {
1491     PetscDraw  draw;
1492     PetscBool  isnull;
1493     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1494     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1495   }
1496 
1497   if (size == 1) {
1498     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1499     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1500   } else {
1501     /* assemble the entire matrix onto first processor. */
1502     Mat         A;
1503     Mat_SeqAIJ  *Aloc;
1504     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1505     MatScalar   *a;
1506 
1507     if (mat->rmap->N > 1024) {
1508       PetscBool  flg = PETSC_FALSE;
1509 
1510       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1511       if (!flg) {
1512         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1513       }
1514     }
1515 
1516     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1517     if (!rank) {
1518       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1519     } else {
1520       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1521     }
1522     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1523     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1524     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1525     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1526     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1527 
1528     /* copy over the A part */
1529     Aloc = (Mat_SeqAIJ*)aij->A->data;
1530     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1531     row = mat->rmap->rstart;
1532     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1533     for (i=0; i<m; i++) {
1534       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1535       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1536     }
1537     aj = Aloc->j;
1538     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1539 
1540     /* copy over the B part */
1541     Aloc = (Mat_SeqAIJ*)aij->B->data;
1542     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543     row  = mat->rmap->rstart;
1544     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1545     ct   = cols;
1546     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1547     for (i=0; i<m; i++) {
1548       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1549       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1550     }
1551     ierr = PetscFree(ct);CHKERRQ(ierr);
1552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554     /*
1555        Everyone has to call to draw the matrix since the graphics waits are
1556        synchronized across all processors that share the PetscDraw object
1557     */
1558     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1559     if (!rank) {
1560       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1561       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1562       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1563       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1564     }
1565     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     ierr = MatDestroy(&A);CHKERRQ(ierr);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatView_MPIAIJ"
1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1574 {
1575   PetscErrorCode ierr;
1576   PetscBool      iascii,isdraw,issocket,isbinary;
1577 
1578   PetscFunctionBegin;
1579   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1580   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1581   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1582   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1583   if (iascii || isdraw || isbinary || issocket) {
1584     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1585   } else {
1586     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSOR_MPIAIJ"
1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1594 {
1595   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1596   PetscErrorCode ierr;
1597   Vec            bb1 = 0;
1598   PetscBool      hasop;
1599 
1600   PetscFunctionBegin;
1601   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1602     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1603   }
1604 
1605   if (flag == SOR_APPLY_UPPER) {
1606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607     PetscFunctionReturn(0);
1608   }
1609 
1610   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1611     if (flag & SOR_ZERO_INITIAL_GUESS) {
1612       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1613       its--;
1614     }
1615 
1616     while (its--) {
1617       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1618       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1619 
1620       /* update rhs: bb1 = bb - B*x */
1621       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1622       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1623 
1624       /* local sweep */
1625       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1626     }
1627   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1628     if (flag & SOR_ZERO_INITIAL_GUESS) {
1629       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1630       its--;
1631     }
1632     while (its--) {
1633       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1634       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1635 
1636       /* update rhs: bb1 = bb - B*x */
1637       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1638       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1639 
1640       /* local sweep */
1641       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1642     }
1643   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1644     if (flag & SOR_ZERO_INITIAL_GUESS) {
1645       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1646       its--;
1647     }
1648     while (its--) {
1649       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1650       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1651 
1652       /* update rhs: bb1 = bb - B*x */
1653       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1654       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1655 
1656       /* local sweep */
1657       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1658     }
1659   }  else if (flag & SOR_EISENSTAT) {
1660     Vec         xx1;
1661 
1662     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1663     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1664 
1665     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667     if (!mat->diag) {
1668       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1669       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1670     }
1671     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1672     if (hasop) {
1673       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1674     } else {
1675       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1676     }
1677     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1678 
1679     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1680 
1681     /* local sweep */
1682     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1683     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1684     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1685   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1686 
1687   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatPermute_MPIAIJ"
1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1694 {
1695   MPI_Comm       comm;
1696   PetscInt       first,local_rowsize,local_colsize;
1697   const PetscInt *rows;
1698   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1703   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1704   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1705   /* collect the global row permutation and invert it */
1706   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1707   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1708   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1709   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1710   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1711   /* get the local target indices */
1712   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1713   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1714   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1716   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1717   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1718   /* the column permutation is so much easier;
1719      make a local version of 'colp' and invert it */
1720   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1721   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1722   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1723   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1724   /* now we just get the submatrix */
1725   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1726   /* clean up */
1727   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1728   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1735 {
1736   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1737   Mat            A = mat->A,B = mat->B;
1738   PetscErrorCode ierr;
1739   PetscReal      isend[5],irecv[5];
1740 
1741   PetscFunctionBegin;
1742   info->block_size     = 1.0;
1743   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1744   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1745   isend[3] = info->memory;  isend[4] = info->mallocs;
1746   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1747   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1748   isend[3] += info->memory;  isend[4] += info->mallocs;
1749   if (flag == MAT_LOCAL) {
1750     info->nz_used      = isend[0];
1751     info->nz_allocated = isend[1];
1752     info->nz_unneeded  = isend[2];
1753     info->memory       = isend[3];
1754     info->mallocs      = isend[4];
1755   } else if (flag == MAT_GLOBAL_MAX) {
1756     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1757     info->nz_used      = irecv[0];
1758     info->nz_allocated = irecv[1];
1759     info->nz_unneeded  = irecv[2];
1760     info->memory       = irecv[3];
1761     info->mallocs      = irecv[4];
1762   } else if (flag == MAT_GLOBAL_SUM) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   }
1770   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1771   info->fill_ratio_needed = 0;
1772   info->factor_mallocs    = 0;
1773 
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatSetOption_MPIAIJ"
1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1780 {
1781   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1782   PetscErrorCode ierr;
1783 
1784   PetscFunctionBegin;
1785   switch (op) {
1786   case MAT_NEW_NONZERO_LOCATIONS:
1787   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1788   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1789   case MAT_KEEP_NONZERO_PATTERN:
1790   case MAT_NEW_NONZERO_LOCATION_ERR:
1791   case MAT_USE_INODES:
1792   case MAT_IGNORE_ZERO_ENTRIES:
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1795     break;
1796   case MAT_ROW_ORIENTED:
1797     a->roworiented = flg;
1798     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1799     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1800     break;
1801   case MAT_NEW_DIAGONALS:
1802     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1803     break;
1804   case MAT_IGNORE_OFF_PROC_ENTRIES:
1805     a->donotstash = flg;
1806     break;
1807   case MAT_SPD:
1808     A->spd_set                         = PETSC_TRUE;
1809     A->spd                             = flg;
1810     if (flg) {
1811       A->symmetric                     = PETSC_TRUE;
1812       A->structurally_symmetric        = PETSC_TRUE;
1813       A->symmetric_set                 = PETSC_TRUE;
1814       A->structurally_symmetric_set    = PETSC_TRUE;
1815     }
1816     break;
1817   case MAT_SYMMETRIC:
1818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_STRUCTURALLY_SYMMETRIC:
1821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1822     break;
1823   case MAT_HERMITIAN:
1824     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1825     break;
1826   case MAT_SYMMETRY_ETERNAL:
1827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1828     break;
1829   case DIAGBLOCK_MAT_CSR:
1830   case OFFDIAGBLOCK_MAT_CSR:
1831   case MAT_CSR:
1832   case DIAGBLOCK_MAT_DIA:
1833   case OFFDIAGBLOCK_MAT_DIA:
1834   case MAT_DIA:
1835   case DIAGBLOCK_MAT_ELL:
1836   case OFFDIAGBLOCK_MAT_ELL:
1837   case MAT_ELL:
1838   case DIAGBLOCK_MAT_HYB:
1839   case OFFDIAGBLOCK_MAT_HYB:
1840   case MAT_HYB:
1841     /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */
1842     break;
1843 
1844   default:
1845     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1846   }
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatGetRow_MPIAIJ"
1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1853 {
1854   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1855   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1856   PetscErrorCode ierr;
1857   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1858   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1859   PetscInt       *cmap,*idx_p;
1860 
1861   PetscFunctionBegin;
1862   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1863   mat->getrowactive = PETSC_TRUE;
1864 
1865   if (!mat->rowvalues && (idx || v)) {
1866     /*
1867         allocate enough space to hold information from the longest row.
1868     */
1869     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1870     PetscInt   max = 1,tmp;
1871     for (i=0; i<matin->rmap->n; i++) {
1872       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1873       if (max < tmp) { max = tmp; }
1874     }
1875     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1876   }
1877 
1878   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1879   lrow = row - rstart;
1880 
1881   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1882   if (!v)   {pvA = 0; pvB = 0;}
1883   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1884   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1885   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1886   nztot = nzA + nzB;
1887 
1888   cmap  = mat->garray;
1889   if (v  || idx) {
1890     if (nztot) {
1891       /* Sort by increasing column numbers, assuming A and B already sorted */
1892       PetscInt imark = -1;
1893       if (v) {
1894         *v = v_p = mat->rowvalues;
1895         for (i=0; i<nzB; i++) {
1896           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1897           else break;
1898         }
1899         imark = i;
1900         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1901         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1902       }
1903       if (idx) {
1904         *idx = idx_p = mat->rowindices;
1905         if (imark > -1) {
1906           for (i=0; i<imark; i++) {
1907             idx_p[i] = cmap[cworkB[i]];
1908           }
1909         } else {
1910           for (i=0; i<nzB; i++) {
1911             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1912             else break;
1913           }
1914           imark = i;
1915         }
1916         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1917         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1918       }
1919     } else {
1920       if (idx) *idx = 0;
1921       if (v)   *v   = 0;
1922     }
1923   }
1924   *nz = nztot;
1925   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1926   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 #undef __FUNCT__
1931 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1932 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1933 {
1934   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1935 
1936   PetscFunctionBegin;
1937   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1938   aij->getrowactive = PETSC_FALSE;
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatNorm_MPIAIJ"
1944 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1945 {
1946   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1947   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1948   PetscErrorCode ierr;
1949   PetscInt       i,j,cstart = mat->cmap->rstart;
1950   PetscReal      sum = 0.0;
1951   MatScalar      *v;
1952 
1953   PetscFunctionBegin;
1954   if (aij->size == 1) {
1955     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1956   } else {
1957     if (type == NORM_FROBENIUS) {
1958       v = amat->a;
1959       for (i=0; i<amat->nz; i++) {
1960 #if defined(PETSC_USE_COMPLEX)
1961         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1962 #else
1963         sum += (*v)*(*v); v++;
1964 #endif
1965       }
1966       v = bmat->a;
1967       for (i=0; i<bmat->nz; i++) {
1968 #if defined(PETSC_USE_COMPLEX)
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970 #else
1971         sum += (*v)*(*v); v++;
1972 #endif
1973       }
1974       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1975       *norm = PetscSqrtReal(*norm);
1976     } else if (type == NORM_1) { /* max column norm */
1977       PetscReal *tmp,*tmp2;
1978       PetscInt  *jj,*garray = aij->garray;
1979       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1980       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1981       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1982       *norm = 0.0;
1983       v = amat->a; jj = amat->j;
1984       for (j=0; j<amat->nz; j++) {
1985         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1986       }
1987       v = bmat->a; jj = bmat->j;
1988       for (j=0; j<bmat->nz; j++) {
1989         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1990       }
1991       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1992       for (j=0; j<mat->cmap->N; j++) {
1993         if (tmp2[j] > *norm) *norm = tmp2[j];
1994       }
1995       ierr = PetscFree(tmp);CHKERRQ(ierr);
1996       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1997     } else if (type == NORM_INFINITY) { /* max row norm */
1998       PetscReal ntemp = 0.0;
1999       for (j=0; j<aij->A->rmap->n; j++) {
2000         v = amat->a + amat->i[j];
2001         sum = 0.0;
2002         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2003           sum += PetscAbsScalar(*v); v++;
2004         }
2005         v = bmat->a + bmat->i[j];
2006         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2007           sum += PetscAbsScalar(*v); v++;
2008         }
2009         if (sum > ntemp) ntemp = sum;
2010       }
2011       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2012     } else {
2013       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2014     }
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatTranspose_MPIAIJ"
2021 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2022 {
2023   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2024   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2025   PetscErrorCode ierr;
2026   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2027   PetscInt       cstart=A->cmap->rstart,ncol;
2028   Mat            B;
2029   MatScalar      *array;
2030 
2031   PetscFunctionBegin;
2032   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2033 
2034   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2035   ai = Aloc->i; aj = Aloc->j;
2036   bi = Bloc->i; bj = Bloc->j;
2037   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2038     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2039     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2040     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2041     for (i=0; i<ai[ma]; i++){
2042       d_nnz[aj[i]] ++;
2043       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2044     }
2045 
2046     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2047     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2048     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2049     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2050     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2051     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2052     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2053   } else {
2054     B = *matout;
2055   }
2056 
2057   /* copy over the A part */
2058   array = Aloc->a;
2059   row = A->rmap->rstart;
2060   for (i=0; i<ma; i++) {
2061     ncol = ai[i+1]-ai[i];
2062     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2063     row++; array += ncol; aj += ncol;
2064   }
2065   aj = Aloc->j;
2066   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2067 
2068   /* copy over the B part */
2069   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2070   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2071   array = Bloc->a;
2072   row = A->rmap->rstart;
2073   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2074   cols_tmp = cols;
2075   for (i=0; i<mb; i++) {
2076     ncol = bi[i+1]-bi[i];
2077     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2078     row++; array += ncol; cols_tmp += ncol;
2079   }
2080   ierr = PetscFree(cols);CHKERRQ(ierr);
2081 
2082   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2084   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2085     *matout = B;
2086   } else {
2087     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2088   }
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2094 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2095 {
2096   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2097   Mat            a = aij->A,b = aij->B;
2098   PetscErrorCode ierr;
2099   PetscInt       s1,s2,s3;
2100 
2101   PetscFunctionBegin;
2102   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2103   if (rr) {
2104     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2105     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2106     /* Overlap communication with computation. */
2107     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2108   }
2109   if (ll) {
2110     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2111     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2112     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2113   }
2114   /* scale  the diagonal block */
2115   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2116 
2117   if (rr) {
2118     /* Do a scatter end and then right scale the off-diagonal block */
2119     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2120     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2121   }
2122 
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 #undef __FUNCT__
2127 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2128 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2129 {
2130   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2131   PetscErrorCode ierr;
2132 
2133   PetscFunctionBegin;
2134   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 #undef __FUNCT__
2139 #define __FUNCT__ "MatEqual_MPIAIJ"
2140 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2141 {
2142   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2143   Mat            a,b,c,d;
2144   PetscBool      flg;
2145   PetscErrorCode ierr;
2146 
2147   PetscFunctionBegin;
2148   a = matA->A; b = matA->B;
2149   c = matB->A; d = matB->B;
2150 
2151   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2152   if (flg) {
2153     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2154   }
2155   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 #undef __FUNCT__
2160 #define __FUNCT__ "MatCopy_MPIAIJ"
2161 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2162 {
2163   PetscErrorCode ierr;
2164   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2165   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2166 
2167   PetscFunctionBegin;
2168   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2169   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2170     /* because of the column compression in the off-processor part of the matrix a->B,
2171        the number of columns in a->B and b->B may be different, hence we cannot call
2172        the MatCopy() directly on the two parts. If need be, we can provide a more
2173        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2174        then copying the submatrices */
2175     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2176   } else {
2177     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2178     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2179   }
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 #undef __FUNCT__
2184 #define __FUNCT__ "MatSetUp_MPIAIJ"
2185 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2186 {
2187   PetscErrorCode ierr;
2188 
2189   PetscFunctionBegin;
2190   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2196 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2197 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2198 {
2199   PetscInt          i,m=Y->rmap->N;
2200   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2201   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2202   const PetscInt    *xi = x->i,*yi = y->i;
2203 
2204   PetscFunctionBegin;
2205   /* Set the number of nonzeros in the new matrix */
2206   for(i=0; i<m; i++) {
2207     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2208     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2209     nnz[i] = 0;
2210     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2211       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2212       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2213       nnz[i]++;
2214     }
2215     for (; k<nzy; k++) nnz[i]++;
2216   }
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatAXPY_MPIAIJ"
2222 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2223 {
2224   PetscErrorCode ierr;
2225   PetscInt       i;
2226   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2227   PetscBLASInt   bnz,one=1;
2228   Mat_SeqAIJ     *x,*y;
2229 
2230   PetscFunctionBegin;
2231   if (str == SAME_NONZERO_PATTERN) {
2232     PetscScalar alpha = a;
2233     x = (Mat_SeqAIJ *)xx->A->data;
2234     y = (Mat_SeqAIJ *)yy->A->data;
2235     bnz = PetscBLASIntCast(x->nz);
2236     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2237     x = (Mat_SeqAIJ *)xx->B->data;
2238     y = (Mat_SeqAIJ *)yy->B->data;
2239     bnz = PetscBLASIntCast(x->nz);
2240     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2241   } else if (str == SUBSET_NONZERO_PATTERN) {
2242     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2243 
2244     x = (Mat_SeqAIJ *)xx->B->data;
2245     y = (Mat_SeqAIJ *)yy->B->data;
2246     if (y->xtoy && y->XtoY != xx->B) {
2247       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2248       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2249     }
2250     if (!y->xtoy) { /* get xtoy */
2251       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2252       y->XtoY = xx->B;
2253       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2254     }
2255     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2256   } else {
2257     Mat B;
2258     PetscInt *nnz_d,*nnz_o;
2259     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2260     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2261     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2262     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2263     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2264     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2265     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2266     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2267     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2268     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2269     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2270     ierr = MatHeaderReplace(Y,B);
2271     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2272     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2273   }
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatConjugate_MPIAIJ"
2281 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2282 {
2283 #if defined(PETSC_USE_COMPLEX)
2284   PetscErrorCode ierr;
2285   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2289   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2290 #else
2291   PetscFunctionBegin;
2292 #endif
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatRealPart_MPIAIJ"
2298 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2299 {
2300   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2301   PetscErrorCode ierr;
2302 
2303   PetscFunctionBegin;
2304   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2305   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 #undef __FUNCT__
2310 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2311 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2312 {
2313   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2318   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #ifdef PETSC_HAVE_PBGL
2323 
2324 #include <boost/parallel/mpi/bsp_process_group.hpp>
2325 #include <boost/graph/distributed/ilu_default_graph.hpp>
2326 #include <boost/graph/distributed/ilu_0_block.hpp>
2327 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2328 #include <boost/graph/distributed/petsc/interface.hpp>
2329 #include <boost/multi_array.hpp>
2330 #include <boost/parallel/distributed_property_map->hpp>
2331 
2332 #undef __FUNCT__
2333 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2334 /*
2335   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2336 */
2337 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2338 {
2339   namespace petsc = boost::distributed::petsc;
2340 
2341   namespace graph_dist = boost::graph::distributed;
2342   using boost::graph::distributed::ilu_default::process_group_type;
2343   using boost::graph::ilu_permuted;
2344 
2345   PetscBool       row_identity, col_identity;
2346   PetscContainer  c;
2347   PetscInt        m, n, M, N;
2348   PetscErrorCode  ierr;
2349 
2350   PetscFunctionBegin;
2351   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2352   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2353   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2354   if (!row_identity || !col_identity) {
2355     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2356   }
2357 
2358   process_group_type pg;
2359   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2360   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2361   lgraph_type&   level_graph = *lgraph_p;
2362   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2363 
2364   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2365   ilu_permuted(level_graph);
2366 
2367   /* put together the new matrix */
2368   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2369   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2370   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2371   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2372   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2373   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2374   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2375   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2376 
2377   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2378   ierr = PetscContainerSetPointer(c, lgraph_p);
2379   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2380   ierr = PetscContainerDestroy(&c);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2386 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2387 {
2388   PetscFunctionBegin;
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 #undef __FUNCT__
2393 #define __FUNCT__ "MatSolve_MPIAIJ"
2394 /*
2395   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2396 */
2397 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2398 {
2399   namespace graph_dist = boost::graph::distributed;
2400 
2401   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2402   lgraph_type*   lgraph_p;
2403   PetscContainer c;
2404   PetscErrorCode ierr;
2405 
2406   PetscFunctionBegin;
2407   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2408   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2409   ierr = VecCopy(b, x);CHKERRQ(ierr);
2410 
2411   PetscScalar* array_x;
2412   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2413   PetscInt sx;
2414   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2415 
2416   PetscScalar* array_b;
2417   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2418   PetscInt sb;
2419   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2420 
2421   lgraph_type&   level_graph = *lgraph_p;
2422   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2423 
2424   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2425   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2426                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2427 
2428   typedef boost::iterator_property_map<array_ref_type::iterator,
2429                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2430   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2431                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2432 
2433   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2434 
2435   PetscFunctionReturn(0);
2436 }
2437 #endif
2438 
2439 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2440   PetscInt       nzlocal,nsends,nrecvs;
2441   PetscMPIInt    *send_rank,*recv_rank;
2442   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2443   PetscScalar    *sbuf_a,**rbuf_a;
2444   PetscErrorCode (*Destroy)(Mat);
2445 } Mat_Redundant;
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2449 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2450 {
2451   PetscErrorCode       ierr;
2452   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2453   PetscInt             i;
2454 
2455   PetscFunctionBegin;
2456   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2457   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2458   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2459   for (i=0; i<redund->nrecvs; i++){
2460     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2461     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2462   }
2463   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2464   ierr = PetscFree(redund);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatDestroy_MatRedundant"
2470 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2471 {
2472   PetscErrorCode  ierr;
2473   PetscContainer  container;
2474   Mat_Redundant   *redund=PETSC_NULL;
2475 
2476   PetscFunctionBegin;
2477   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2478   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2479   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2480   A->ops->destroy = redund->Destroy;
2481   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2482   if (A->ops->destroy) {
2483     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2484   }
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 #undef __FUNCT__
2489 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2490 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2491 {
2492   PetscMPIInt    rank,size;
2493   MPI_Comm       comm=((PetscObject)mat)->comm;
2494   PetscErrorCode ierr;
2495   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2496   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2497   PetscInt       *rowrange=mat->rmap->range;
2498   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2499   Mat            A=aij->A,B=aij->B,C=*matredundant;
2500   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2501   PetscScalar    *sbuf_a;
2502   PetscInt       nzlocal=a->nz+b->nz;
2503   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2504   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2505   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2506   MatScalar      *aworkA,*aworkB;
2507   PetscScalar    *vals;
2508   PetscMPIInt    tag1,tag2,tag3,imdex;
2509   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2510                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2511   MPI_Status     recv_status,*send_status;
2512   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2513   PetscInt       **rbuf_j=PETSC_NULL;
2514   PetscScalar    **rbuf_a=PETSC_NULL;
2515   Mat_Redundant  *redund=PETSC_NULL;
2516   PetscContainer container;
2517 
2518   PetscFunctionBegin;
2519   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2520   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2521 
2522   if (reuse == MAT_REUSE_MATRIX) {
2523     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2524     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2525     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2526     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2527     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2528     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2529     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2530     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2531 
2532     nsends    = redund->nsends;
2533     nrecvs    = redund->nrecvs;
2534     send_rank = redund->send_rank;
2535     recv_rank = redund->recv_rank;
2536     sbuf_nz   = redund->sbuf_nz;
2537     rbuf_nz   = redund->rbuf_nz;
2538     sbuf_j    = redund->sbuf_j;
2539     sbuf_a    = redund->sbuf_a;
2540     rbuf_j    = redund->rbuf_j;
2541     rbuf_a    = redund->rbuf_a;
2542   }
2543 
2544   if (reuse == MAT_INITIAL_MATRIX){
2545     PetscMPIInt  subrank,subsize;
2546     PetscInt     nleftover,np_subcomm;
2547     /* get the destination processors' id send_rank, nsends and nrecvs */
2548     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2549     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2550     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2551     np_subcomm = size/nsubcomm;
2552     nleftover  = size - nsubcomm*np_subcomm;
2553     nsends = 0; nrecvs = 0;
2554     for (i=0; i<size; i++){ /* i=rank*/
2555       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2556         send_rank[nsends] = i; nsends++;
2557         recv_rank[nrecvs++] = i;
2558       }
2559     }
2560     if (rank >= size - nleftover){/* this proc is a leftover processor */
2561       i = size-nleftover-1;
2562       j = 0;
2563       while (j < nsubcomm - nleftover){
2564         send_rank[nsends++] = i;
2565         i--; j++;
2566       }
2567     }
2568 
2569     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2570       for (i=0; i<nleftover; i++){
2571         recv_rank[nrecvs++] = size-nleftover+i;
2572       }
2573     }
2574 
2575     /* allocate sbuf_j, sbuf_a */
2576     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2577     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2578     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2579   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2580 
2581   /* copy mat's local entries into the buffers */
2582   if (reuse == MAT_INITIAL_MATRIX){
2583     rownz_max = 0;
2584     rptr = sbuf_j;
2585     cols = sbuf_j + rend-rstart + 1;
2586     vals = sbuf_a;
2587     rptr[0] = 0;
2588     for (i=0; i<rend-rstart; i++){
2589       row = i + rstart;
2590       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2591       ncols  = nzA + nzB;
2592       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2593       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2594       /* load the column indices for this row into cols */
2595       lwrite = 0;
2596       for (l=0; l<nzB; l++) {
2597         if ((ctmp = bmap[cworkB[l]]) < cstart){
2598           vals[lwrite]   = aworkB[l];
2599           cols[lwrite++] = ctmp;
2600         }
2601       }
2602       for (l=0; l<nzA; l++){
2603         vals[lwrite]   = aworkA[l];
2604         cols[lwrite++] = cstart + cworkA[l];
2605       }
2606       for (l=0; l<nzB; l++) {
2607         if ((ctmp = bmap[cworkB[l]]) >= cend){
2608           vals[lwrite]   = aworkB[l];
2609           cols[lwrite++] = ctmp;
2610         }
2611       }
2612       vals += ncols;
2613       cols += ncols;
2614       rptr[i+1] = rptr[i] + ncols;
2615       if (rownz_max < ncols) rownz_max = ncols;
2616     }
2617     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2618   } else { /* only copy matrix values into sbuf_a */
2619     rptr = sbuf_j;
2620     vals = sbuf_a;
2621     rptr[0] = 0;
2622     for (i=0; i<rend-rstart; i++){
2623       row = i + rstart;
2624       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2625       ncols  = nzA + nzB;
2626       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2627       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2628       lwrite = 0;
2629       for (l=0; l<nzB; l++) {
2630         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2631       }
2632       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2633       for (l=0; l<nzB; l++) {
2634         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2635       }
2636       vals += ncols;
2637       rptr[i+1] = rptr[i] + ncols;
2638     }
2639   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2640 
2641   /* send nzlocal to others, and recv other's nzlocal */
2642   /*--------------------------------------------------*/
2643   if (reuse == MAT_INITIAL_MATRIX){
2644     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2645     s_waits2 = s_waits3 + nsends;
2646     s_waits1 = s_waits2 + nsends;
2647     r_waits1 = s_waits1 + nsends;
2648     r_waits2 = r_waits1 + nrecvs;
2649     r_waits3 = r_waits2 + nrecvs;
2650   } else {
2651     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2652     r_waits3 = s_waits3 + nsends;
2653   }
2654 
2655   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2656   if (reuse == MAT_INITIAL_MATRIX){
2657     /* get new tags to keep the communication clean */
2658     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2659     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2660     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2661 
2662     /* post receives of other's nzlocal */
2663     for (i=0; i<nrecvs; i++){
2664       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2665     }
2666     /* send nzlocal to others */
2667     for (i=0; i<nsends; i++){
2668       sbuf_nz[i] = nzlocal;
2669       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2670     }
2671     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2672     count = nrecvs;
2673     while (count) {
2674       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2675       recv_rank[imdex] = recv_status.MPI_SOURCE;
2676       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2677       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2678 
2679       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2680       rbuf_nz[imdex] += i + 2;
2681       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2682       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2683       count--;
2684     }
2685     /* wait on sends of nzlocal */
2686     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2687     /* send mat->i,j to others, and recv from other's */
2688     /*------------------------------------------------*/
2689     for (i=0; i<nsends; i++){
2690       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2691       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2692     }
2693     /* wait on receives of mat->i,j */
2694     /*------------------------------*/
2695     count = nrecvs;
2696     while (count) {
2697       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2698       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2699       count--;
2700     }
2701     /* wait on sends of mat->i,j */
2702     /*---------------------------*/
2703     if (nsends) {
2704       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2705     }
2706   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2707 
2708   /* post receives, send and receive mat->a */
2709   /*----------------------------------------*/
2710   for (imdex=0; imdex<nrecvs; imdex++) {
2711     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2712   }
2713   for (i=0; i<nsends; i++){
2714     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2715   }
2716   count = nrecvs;
2717   while (count) {
2718     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2719     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2720     count--;
2721   }
2722   if (nsends) {
2723     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2724   }
2725 
2726   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2727 
2728   /* create redundant matrix */
2729   /*-------------------------*/
2730   if (reuse == MAT_INITIAL_MATRIX){
2731     /* compute rownz_max for preallocation */
2732     for (imdex=0; imdex<nrecvs; imdex++){
2733       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2734       rptr = rbuf_j[imdex];
2735       for (i=0; i<j; i++){
2736         ncols = rptr[i+1] - rptr[i];
2737         if (rownz_max < ncols) rownz_max = ncols;
2738       }
2739     }
2740 
2741     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2742     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2743     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2744     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2745     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2746     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2747   } else {
2748     C = *matredundant;
2749   }
2750 
2751   /* insert local matrix entries */
2752   rptr = sbuf_j;
2753   cols = sbuf_j + rend-rstart + 1;
2754   vals = sbuf_a;
2755   for (i=0; i<rend-rstart; i++){
2756     row   = i + rstart;
2757     ncols = rptr[i+1] - rptr[i];
2758     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2759     vals += ncols;
2760     cols += ncols;
2761   }
2762   /* insert received matrix entries */
2763   for (imdex=0; imdex<nrecvs; imdex++){
2764     rstart = rowrange[recv_rank[imdex]];
2765     rend   = rowrange[recv_rank[imdex]+1];
2766     rptr = rbuf_j[imdex];
2767     cols = rbuf_j[imdex] + rend-rstart + 1;
2768     vals = rbuf_a[imdex];
2769     for (i=0; i<rend-rstart; i++){
2770       row   = i + rstart;
2771       ncols = rptr[i+1] - rptr[i];
2772       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2773       vals += ncols;
2774       cols += ncols;
2775     }
2776   }
2777   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2778   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2779   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2780   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2781   if (reuse == MAT_INITIAL_MATRIX) {
2782     PetscContainer container;
2783     *matredundant = C;
2784     /* create a supporting struct and attach it to C for reuse */
2785     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2786     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2787     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2788     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2789     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2790     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2791 
2792     redund->nzlocal = nzlocal;
2793     redund->nsends  = nsends;
2794     redund->nrecvs  = nrecvs;
2795     redund->send_rank = send_rank;
2796     redund->recv_rank = recv_rank;
2797     redund->sbuf_nz = sbuf_nz;
2798     redund->rbuf_nz = rbuf_nz;
2799     redund->sbuf_j  = sbuf_j;
2800     redund->sbuf_a  = sbuf_a;
2801     redund->rbuf_j  = rbuf_j;
2802     redund->rbuf_a  = rbuf_a;
2803 
2804     redund->Destroy = C->ops->destroy;
2805     C->ops->destroy = MatDestroy_MatRedundant;
2806   }
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 #undef __FUNCT__
2811 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2812 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2813 {
2814   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2815   PetscErrorCode ierr;
2816   PetscInt       i,*idxb = 0;
2817   PetscScalar    *va,*vb;
2818   Vec            vtmp;
2819 
2820   PetscFunctionBegin;
2821   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2822   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2823   if (idx) {
2824     for (i=0; i<A->rmap->n; i++) {
2825       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2826     }
2827   }
2828 
2829   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2830   if (idx) {
2831     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2832   }
2833   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2834   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2835 
2836   for (i=0; i<A->rmap->n; i++){
2837     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2838       va[i] = vb[i];
2839       if (idx) idx[i] = a->garray[idxb[i]];
2840     }
2841   }
2842 
2843   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2844   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2845   ierr = PetscFree(idxb);CHKERRQ(ierr);
2846   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 #undef __FUNCT__
2851 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2852 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2853 {
2854   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2855   PetscErrorCode ierr;
2856   PetscInt       i,*idxb = 0;
2857   PetscScalar    *va,*vb;
2858   Vec            vtmp;
2859 
2860   PetscFunctionBegin;
2861   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2862   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2863   if (idx) {
2864     for (i=0; i<A->cmap->n; i++) {
2865       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2866     }
2867   }
2868 
2869   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2870   if (idx) {
2871     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2872   }
2873   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2874   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2875 
2876   for (i=0; i<A->rmap->n; i++){
2877     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2878       va[i] = vb[i];
2879       if (idx) idx[i] = a->garray[idxb[i]];
2880     }
2881   }
2882 
2883   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2884   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2885   ierr = PetscFree(idxb);CHKERRQ(ierr);
2886   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 #undef __FUNCT__
2891 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2892 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2893 {
2894   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2895   PetscInt       n      = A->rmap->n;
2896   PetscInt       cstart = A->cmap->rstart;
2897   PetscInt      *cmap   = mat->garray;
2898   PetscInt      *diagIdx, *offdiagIdx;
2899   Vec            diagV, offdiagV;
2900   PetscScalar   *a, *diagA, *offdiagA;
2901   PetscInt       r;
2902   PetscErrorCode ierr;
2903 
2904   PetscFunctionBegin;
2905   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2906   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2907   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2908   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2909   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2910   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2911   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2912   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2913   for(r = 0; r < n; ++r) {
2914     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2915       a[r]   = diagA[r];
2916       idx[r] = cstart + diagIdx[r];
2917     } else {
2918       a[r]   = offdiagA[r];
2919       idx[r] = cmap[offdiagIdx[r]];
2920     }
2921   }
2922   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2923   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2924   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2925   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2926   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2927   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2928   PetscFunctionReturn(0);
2929 }
2930 
2931 #undef __FUNCT__
2932 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2933 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2934 {
2935   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2936   PetscInt       n      = A->rmap->n;
2937   PetscInt       cstart = A->cmap->rstart;
2938   PetscInt      *cmap   = mat->garray;
2939   PetscInt      *diagIdx, *offdiagIdx;
2940   Vec            diagV, offdiagV;
2941   PetscScalar   *a, *diagA, *offdiagA;
2942   PetscInt       r;
2943   PetscErrorCode ierr;
2944 
2945   PetscFunctionBegin;
2946   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2947   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2948   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2949   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2950   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2951   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2952   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2953   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2954   for(r = 0; r < n; ++r) {
2955     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2956       a[r]   = diagA[r];
2957       idx[r] = cstart + diagIdx[r];
2958     } else {
2959       a[r]   = offdiagA[r];
2960       idx[r] = cmap[offdiagIdx[r]];
2961     }
2962   }
2963   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2964   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2965   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2966   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2967   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2968   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2974 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2975 {
2976   PetscErrorCode ierr;
2977   Mat            *dummy;
2978 
2979   PetscFunctionBegin;
2980   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2981   *newmat = *dummy;
2982   ierr = PetscFree(dummy);CHKERRQ(ierr);
2983   PetscFunctionReturn(0);
2984 }
2985 
2986 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2987 
2988 #undef __FUNCT__
2989 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2990 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2991 {
2992   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2993   PetscErrorCode ierr;
2994 
2995   PetscFunctionBegin;
2996   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2997   PetscFunctionReturn(0);
2998 }
2999 
3000 
3001 /* -------------------------------------------------------------------*/
3002 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3003        MatGetRow_MPIAIJ,
3004        MatRestoreRow_MPIAIJ,
3005        MatMult_MPIAIJ,
3006 /* 4*/ MatMultAdd_MPIAIJ,
3007        MatMultTranspose_MPIAIJ,
3008        MatMultTransposeAdd_MPIAIJ,
3009 #ifdef PETSC_HAVE_PBGL
3010        MatSolve_MPIAIJ,
3011 #else
3012        0,
3013 #endif
3014        0,
3015        0,
3016 /*10*/ 0,
3017        0,
3018        0,
3019        MatSOR_MPIAIJ,
3020        MatTranspose_MPIAIJ,
3021 /*15*/ MatGetInfo_MPIAIJ,
3022        MatEqual_MPIAIJ,
3023        MatGetDiagonal_MPIAIJ,
3024        MatDiagonalScale_MPIAIJ,
3025        MatNorm_MPIAIJ,
3026 /*20*/ MatAssemblyBegin_MPIAIJ,
3027        MatAssemblyEnd_MPIAIJ,
3028        MatSetOption_MPIAIJ,
3029        MatZeroEntries_MPIAIJ,
3030 /*24*/ MatZeroRows_MPIAIJ,
3031        0,
3032 #ifdef PETSC_HAVE_PBGL
3033        0,
3034 #else
3035        0,
3036 #endif
3037        0,
3038        0,
3039 /*29*/ MatSetUp_MPIAIJ,
3040 #ifdef PETSC_HAVE_PBGL
3041        0,
3042 #else
3043        0,
3044 #endif
3045        0,
3046        0,
3047        0,
3048 /*34*/ MatDuplicate_MPIAIJ,
3049        0,
3050        0,
3051        0,
3052        0,
3053 /*39*/ MatAXPY_MPIAIJ,
3054        MatGetSubMatrices_MPIAIJ,
3055        MatIncreaseOverlap_MPIAIJ,
3056        MatGetValues_MPIAIJ,
3057        MatCopy_MPIAIJ,
3058 /*44*/ MatGetRowMax_MPIAIJ,
3059        MatScale_MPIAIJ,
3060        0,
3061        0,
3062        MatZeroRowsColumns_MPIAIJ,
3063 /*49*/ 0,
3064        0,
3065        0,
3066        0,
3067        0,
3068 /*54*/ MatFDColoringCreate_MPIAIJ,
3069        0,
3070        MatSetUnfactored_MPIAIJ,
3071        MatPermute_MPIAIJ,
3072        0,
3073 /*59*/ MatGetSubMatrix_MPIAIJ,
3074        MatDestroy_MPIAIJ,
3075        MatView_MPIAIJ,
3076        0,
3077        0,
3078 /*64*/ 0,
3079        0,
3080        0,
3081        0,
3082        0,
3083 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3084        MatGetRowMinAbs_MPIAIJ,
3085        0,
3086        MatSetColoring_MPIAIJ,
3087 #if defined(PETSC_HAVE_ADIC)
3088        MatSetValuesAdic_MPIAIJ,
3089 #else
3090        0,
3091 #endif
3092        MatSetValuesAdifor_MPIAIJ,
3093 /*75*/ MatFDColoringApply_AIJ,
3094        0,
3095        0,
3096        0,
3097        0,
3098 /*80*/ 0,
3099        0,
3100        0,
3101 /*83*/ MatLoad_MPIAIJ,
3102        0,
3103        0,
3104        0,
3105        0,
3106        0,
3107 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3108        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3109        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3110        MatPtAP_Basic,
3111        MatPtAPSymbolic_MPIAIJ,
3112 /*94*/ MatPtAPNumeric_MPIAIJ,
3113        0,
3114        0,
3115        0,
3116        0,
3117 /*99*/ 0,
3118        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3119        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3120        MatConjugate_MPIAIJ,
3121        0,
3122 /*104*/MatSetValuesRow_MPIAIJ,
3123        MatRealPart_MPIAIJ,
3124        MatImaginaryPart_MPIAIJ,
3125        0,
3126        0,
3127 /*109*/0,
3128        MatGetRedundantMatrix_MPIAIJ,
3129        MatGetRowMin_MPIAIJ,
3130        0,
3131        0,
3132 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3133        0,
3134        0,
3135        0,
3136        0,
3137 /*119*/0,
3138        0,
3139        0,
3140        0,
3141        MatGetMultiProcBlock_MPIAIJ,
3142 /*124*/MatFindNonZeroRows_MPIAIJ,
3143        MatGetColumnNorms_MPIAIJ,
3144        MatInvertBlockDiagonal_MPIAIJ,
3145        0,
3146        MatGetSubMatricesParallel_MPIAIJ,
3147 /*129*/0,
3148        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3149        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3150        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3151        0,
3152 /*134*/0,
3153        0,
3154        0,
3155        0,
3156        0
3157 };
3158 
3159 /* ----------------------------------------------------------------------------------------*/
3160 
3161 EXTERN_C_BEGIN
3162 #undef __FUNCT__
3163 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3164 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3165 {
3166   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3167   PetscErrorCode ierr;
3168 
3169   PetscFunctionBegin;
3170   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3171   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3172   PetscFunctionReturn(0);
3173 }
3174 EXTERN_C_END
3175 
3176 EXTERN_C_BEGIN
3177 #undef __FUNCT__
3178 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3179 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3180 {
3181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3182   PetscErrorCode ierr;
3183 
3184   PetscFunctionBegin;
3185   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3186   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3187   PetscFunctionReturn(0);
3188 }
3189 EXTERN_C_END
3190 
3191 EXTERN_C_BEGIN
3192 #undef __FUNCT__
3193 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3194 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3195 {
3196   Mat_MPIAIJ     *b;
3197   PetscErrorCode ierr;
3198   PetscInt       i;
3199   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3200 
3201   PetscFunctionBegin;
3202   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3203   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3204   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3205   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3206   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3207   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3208 
3209   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3210   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3211   if (d_nnz) {
3212     for (i=0; i<B->rmap->n; i++) {
3213       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3214     }
3215   }
3216   if (o_nnz) {
3217     for (i=0; i<B->rmap->n; i++) {
3218       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3219     }
3220   }
3221   b = (Mat_MPIAIJ*)B->data;
3222 
3223   if (!B->preallocated) {
3224     /* Explicitly create 2 MATSEQAIJ matrices. */
3225     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3226     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3227     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3228     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3229     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3230     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3231     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3232     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3233   }
3234 
3235   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3236   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3237   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3238   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3239   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3240   B->preallocated = PETSC_TRUE;
3241   PetscFunctionReturn(0);
3242 }
3243 EXTERN_C_END
3244 
3245 #undef __FUNCT__
3246 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3247 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3248 {
3249   Mat            mat;
3250   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3251   PetscErrorCode ierr;
3252 
3253   PetscFunctionBegin;
3254   *newmat       = 0;
3255   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3256   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3257   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3258   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3259   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3260   a    = (Mat_MPIAIJ*)mat->data;
3261 
3262   mat->factortype    = matin->factortype;
3263   mat->rmap->bs      = matin->rmap->bs;
3264   mat->cmap->bs      = matin->cmap->bs;
3265   mat->assembled    = PETSC_TRUE;
3266   mat->insertmode   = NOT_SET_VALUES;
3267   mat->preallocated = PETSC_TRUE;
3268 
3269   a->size           = oldmat->size;
3270   a->rank           = oldmat->rank;
3271   a->donotstash     = oldmat->donotstash;
3272   a->roworiented    = oldmat->roworiented;
3273   a->rowindices     = 0;
3274   a->rowvalues      = 0;
3275   a->getrowactive   = PETSC_FALSE;
3276 
3277   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3278   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3279 
3280   if (oldmat->colmap) {
3281 #if defined (PETSC_USE_CTABLE)
3282     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3283 #else
3284     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3285     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3286     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3287 #endif
3288   } else a->colmap = 0;
3289   if (oldmat->garray) {
3290     PetscInt len;
3291     len  = oldmat->B->cmap->n;
3292     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3293     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3294     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3295   } else a->garray = 0;
3296 
3297   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3298   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3299   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3300   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3301   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3302   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3303   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3304   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3305   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3306   *newmat = mat;
3307   PetscFunctionReturn(0);
3308 }
3309 
3310 
3311 
3312 #undef __FUNCT__
3313 #define __FUNCT__ "MatLoad_MPIAIJ"
3314 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3315 {
3316   PetscScalar    *vals,*svals;
3317   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3318   PetscErrorCode ierr;
3319   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3320   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3321   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3322   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3323   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3324   int            fd;
3325 
3326   PetscFunctionBegin;
3327   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3328   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3329   if (!rank) {
3330     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3331     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3332     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3333   }
3334 
3335   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3336 
3337   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3338   M = header[1]; N = header[2];
3339   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3340   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3341   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3342 
3343   /* If global sizes are set, check if they are consistent with that given in the file */
3344   if (sizesset) {
3345     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3346   }
3347   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3348   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3349 
3350   /* determine ownership of all rows */
3351   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3352   else m = newMat->rmap->n; /* Set by user */
3353 
3354   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3355   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3356 
3357   /* First process needs enough room for process with most rows */
3358   if (!rank) {
3359     mmax       = rowners[1];
3360     for (i=2; i<size; i++) {
3361       mmax = PetscMax(mmax,rowners[i]);
3362     }
3363   } else mmax = m;
3364 
3365   rowners[0] = 0;
3366   for (i=2; i<=size; i++) {
3367     rowners[i] += rowners[i-1];
3368   }
3369   rstart = rowners[rank];
3370   rend   = rowners[rank+1];
3371 
3372   /* distribute row lengths to all processors */
3373   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3374   if (!rank) {
3375     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3376     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3377     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3378     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3379     for (j=0; j<m; j++) {
3380       procsnz[0] += ourlens[j];
3381     }
3382     for (i=1; i<size; i++) {
3383       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3384       /* calculate the number of nonzeros on each processor */
3385       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3386         procsnz[i] += rowlengths[j];
3387       }
3388       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3389     }
3390     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3391   } else {
3392     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3393   }
3394 
3395   if (!rank) {
3396     /* determine max buffer needed and allocate it */
3397     maxnz = 0;
3398     for (i=0; i<size; i++) {
3399       maxnz = PetscMax(maxnz,procsnz[i]);
3400     }
3401     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3402 
3403     /* read in my part of the matrix column indices  */
3404     nz   = procsnz[0];
3405     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3406     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3407 
3408     /* read in every one elses and ship off */
3409     for (i=1; i<size; i++) {
3410       nz     = procsnz[i];
3411       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3412       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3413     }
3414     ierr = PetscFree(cols);CHKERRQ(ierr);
3415   } else {
3416     /* determine buffer space needed for message */
3417     nz = 0;
3418     for (i=0; i<m; i++) {
3419       nz += ourlens[i];
3420     }
3421     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3422 
3423     /* receive message of column indices*/
3424     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3425   }
3426 
3427   /* determine column ownership if matrix is not square */
3428   if (N != M) {
3429     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3430     else n = newMat->cmap->n;
3431     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3432     cstart = cend - n;
3433   } else {
3434     cstart = rstart;
3435     cend   = rend;
3436     n      = cend - cstart;
3437   }
3438 
3439   /* loop over local rows, determining number of off diagonal entries */
3440   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3441   jj = 0;
3442   for (i=0; i<m; i++) {
3443     for (j=0; j<ourlens[i]; j++) {
3444       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3445       jj++;
3446     }
3447   }
3448 
3449   for (i=0; i<m; i++) {
3450     ourlens[i] -= offlens[i];
3451   }
3452   if (!sizesset) {
3453     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3454   }
3455   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3456 
3457   for (i=0; i<m; i++) {
3458     ourlens[i] += offlens[i];
3459   }
3460 
3461   if (!rank) {
3462     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3463 
3464     /* read in my part of the matrix numerical values  */
3465     nz   = procsnz[0];
3466     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3467 
3468     /* insert into matrix */
3469     jj      = rstart;
3470     smycols = mycols;
3471     svals   = vals;
3472     for (i=0; i<m; i++) {
3473       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3474       smycols += ourlens[i];
3475       svals   += ourlens[i];
3476       jj++;
3477     }
3478 
3479     /* read in other processors and ship out */
3480     for (i=1; i<size; i++) {
3481       nz     = procsnz[i];
3482       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3483       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3484     }
3485     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3486   } else {
3487     /* receive numeric values */
3488     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3489 
3490     /* receive message of values*/
3491     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3492 
3493     /* insert into matrix */
3494     jj      = rstart;
3495     smycols = mycols;
3496     svals   = vals;
3497     for (i=0; i<m; i++) {
3498       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3499       smycols += ourlens[i];
3500       svals   += ourlens[i];
3501       jj++;
3502     }
3503   }
3504   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3505   ierr = PetscFree(vals);CHKERRQ(ierr);
3506   ierr = PetscFree(mycols);CHKERRQ(ierr);
3507   ierr = PetscFree(rowners);CHKERRQ(ierr);
3508   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3509   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3510   PetscFunctionReturn(0);
3511 }
3512 
3513 #undef __FUNCT__
3514 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3515 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3516 {
3517   PetscErrorCode ierr;
3518   IS             iscol_local;
3519   PetscInt       csize;
3520 
3521   PetscFunctionBegin;
3522   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3523   if (call == MAT_REUSE_MATRIX) {
3524     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3525     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3526   } else {
3527     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3528   }
3529   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3530   if (call == MAT_INITIAL_MATRIX) {
3531     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3532     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3533   }
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 #undef __FUNCT__
3538 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3539 /*
3540     Not great since it makes two copies of the submatrix, first an SeqAIJ
3541   in local and then by concatenating the local matrices the end result.
3542   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3543 
3544   Note: This requires a sequential iscol with all indices.
3545 */
3546 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3547 {
3548   PetscErrorCode ierr;
3549   PetscMPIInt    rank,size;
3550   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3551   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3552   Mat            *local,M,Mreuse;
3553   MatScalar      *vwork,*aa;
3554   MPI_Comm       comm = ((PetscObject)mat)->comm;
3555   Mat_SeqAIJ     *aij;
3556 
3557 
3558   PetscFunctionBegin;
3559   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3560   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3561 
3562   if (call ==  MAT_REUSE_MATRIX) {
3563     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3564     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3565     local = &Mreuse;
3566     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3567   } else {
3568     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3569     Mreuse = *local;
3570     ierr   = PetscFree(local);CHKERRQ(ierr);
3571   }
3572 
3573   /*
3574       m - number of local rows
3575       n - number of columns (same on all processors)
3576       rstart - first row in new global matrix generated
3577   */
3578   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3579   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3580   if (call == MAT_INITIAL_MATRIX) {
3581     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3582     ii  = aij->i;
3583     jj  = aij->j;
3584 
3585     /*
3586         Determine the number of non-zeros in the diagonal and off-diagonal
3587         portions of the matrix in order to do correct preallocation
3588     */
3589 
3590     /* first get start and end of "diagonal" columns */
3591     if (csize == PETSC_DECIDE) {
3592       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3593       if (mglobal == n) { /* square matrix */
3594 	nlocal = m;
3595       } else {
3596         nlocal = n/size + ((n % size) > rank);
3597       }
3598     } else {
3599       nlocal = csize;
3600     }
3601     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3602     rstart = rend - nlocal;
3603     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3604 
3605     /* next, compute all the lengths */
3606     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3607     olens = dlens + m;
3608     for (i=0; i<m; i++) {
3609       jend = ii[i+1] - ii[i];
3610       olen = 0;
3611       dlen = 0;
3612       for (j=0; j<jend; j++) {
3613         if (*jj < rstart || *jj >= rend) olen++;
3614         else dlen++;
3615         jj++;
3616       }
3617       olens[i] = olen;
3618       dlens[i] = dlen;
3619     }
3620     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3621     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3622     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3623     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3624     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3625     ierr = PetscFree(dlens);CHKERRQ(ierr);
3626   } else {
3627     PetscInt ml,nl;
3628 
3629     M = *newmat;
3630     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3631     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3632     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3633     /*
3634          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3635        rather than the slower MatSetValues().
3636     */
3637     M->was_assembled = PETSC_TRUE;
3638     M->assembled     = PETSC_FALSE;
3639   }
3640   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3641   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3642   ii  = aij->i;
3643   jj  = aij->j;
3644   aa  = aij->a;
3645   for (i=0; i<m; i++) {
3646     row   = rstart + i;
3647     nz    = ii[i+1] - ii[i];
3648     cwork = jj;     jj += nz;
3649     vwork = aa;     aa += nz;
3650     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3651   }
3652 
3653   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3654   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655   *newmat = M;
3656 
3657   /* save submatrix used in processor for next request */
3658   if (call ==  MAT_INITIAL_MATRIX) {
3659     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3660     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3661   }
3662 
3663   PetscFunctionReturn(0);
3664 }
3665 
3666 EXTERN_C_BEGIN
3667 #undef __FUNCT__
3668 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3669 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3670 {
3671   PetscInt       m,cstart, cend,j,nnz,i,d;
3672   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3673   const PetscInt *JJ;
3674   PetscScalar    *values;
3675   PetscErrorCode ierr;
3676 
3677   PetscFunctionBegin;
3678   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3679 
3680   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3681   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3682   m      = B->rmap->n;
3683   cstart = B->cmap->rstart;
3684   cend   = B->cmap->rend;
3685   rstart = B->rmap->rstart;
3686 
3687   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3688 
3689 #if defined(PETSC_USE_DEBUGGING)
3690   for (i=0; i<m; i++) {
3691     nnz     = Ii[i+1]- Ii[i];
3692     JJ      = J + Ii[i];
3693     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3694     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3695     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3696   }
3697 #endif
3698 
3699   for (i=0; i<m; i++) {
3700     nnz     = Ii[i+1]- Ii[i];
3701     JJ      = J + Ii[i];
3702     nnz_max = PetscMax(nnz_max,nnz);
3703     d       = 0;
3704     for (j=0; j<nnz; j++) {
3705       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3706     }
3707     d_nnz[i] = d;
3708     o_nnz[i] = nnz - d;
3709   }
3710   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3711   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3712 
3713   if (v) values = (PetscScalar*)v;
3714   else {
3715     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3716     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3717   }
3718 
3719   for (i=0; i<m; i++) {
3720     ii   = i + rstart;
3721     nnz  = Ii[i+1]- Ii[i];
3722     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3723   }
3724   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3725   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3726 
3727   if (!v) {
3728     ierr = PetscFree(values);CHKERRQ(ierr);
3729   }
3730   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3731   PetscFunctionReturn(0);
3732 }
3733 EXTERN_C_END
3734 
3735 #undef __FUNCT__
3736 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3737 /*@
3738    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3739    (the default parallel PETSc format).
3740 
3741    Collective on MPI_Comm
3742 
3743    Input Parameters:
3744 +  B - the matrix
3745 .  i - the indices into j for the start of each local row (starts with zero)
3746 .  j - the column indices for each local row (starts with zero)
3747 -  v - optional values in the matrix
3748 
3749    Level: developer
3750 
3751    Notes:
3752        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3753      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3754      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3755 
3756        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3757 
3758        The format which is used for the sparse matrix input, is equivalent to a
3759     row-major ordering.. i.e for the following matrix, the input data expected is
3760     as shown:
3761 
3762         1 0 0
3763         2 0 3     P0
3764        -------
3765         4 5 6     P1
3766 
3767      Process0 [P0]: rows_owned=[0,1]
3768         i =  {0,1,3}  [size = nrow+1  = 2+1]
3769         j =  {0,0,2}  [size = nz = 6]
3770         v =  {1,2,3}  [size = nz = 6]
3771 
3772      Process1 [P1]: rows_owned=[2]
3773         i =  {0,3}    [size = nrow+1  = 1+1]
3774         j =  {0,1,2}  [size = nz = 6]
3775         v =  {4,5,6}  [size = nz = 6]
3776 
3777 .keywords: matrix, aij, compressed row, sparse, parallel
3778 
3779 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3780           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3781 @*/
3782 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3783 {
3784   PetscErrorCode ierr;
3785 
3786   PetscFunctionBegin;
3787   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3788   PetscFunctionReturn(0);
3789 }
3790 
3791 #undef __FUNCT__
3792 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3793 /*@C
3794    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3795    (the default parallel PETSc format).  For good matrix assembly performance
3796    the user should preallocate the matrix storage by setting the parameters
3797    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3798    performance can be increased by more than a factor of 50.
3799 
3800    Collective on MPI_Comm
3801 
3802    Input Parameters:
3803 +  A - the matrix
3804 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3805            (same value is used for all local rows)
3806 .  d_nnz - array containing the number of nonzeros in the various rows of the
3807            DIAGONAL portion of the local submatrix (possibly different for each row)
3808            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3809            The size of this array is equal to the number of local rows, i.e 'm'.
3810            For matrices that will be factored, you must leave room for (and set)
3811            the diagonal entry even if it is zero.
3812 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3813            submatrix (same value is used for all local rows).
3814 -  o_nnz - array containing the number of nonzeros in the various rows of the
3815            OFF-DIAGONAL portion of the local submatrix (possibly different for
3816            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3817            structure. The size of this array is equal to the number
3818            of local rows, i.e 'm'.
3819 
3820    If the *_nnz parameter is given then the *_nz parameter is ignored
3821 
3822    The AIJ format (also called the Yale sparse matrix format or
3823    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3824    storage.  The stored row and column indices begin with zero.
3825    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3826 
3827    The parallel matrix is partitioned such that the first m0 rows belong to
3828    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3829    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3830 
3831    The DIAGONAL portion of the local submatrix of a processor can be defined
3832    as the submatrix which is obtained by extraction the part corresponding to
3833    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3834    first row that belongs to the processor, r2 is the last row belonging to
3835    the this processor, and c1-c2 is range of indices of the local part of a
3836    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3837    common case of a square matrix, the row and column ranges are the same and
3838    the DIAGONAL part is also square. The remaining portion of the local
3839    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3840 
3841    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3842 
3843    You can call MatGetInfo() to get information on how effective the preallocation was;
3844    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3845    You can also run with the option -info and look for messages with the string
3846    malloc in them to see if additional memory allocation was needed.
3847 
3848    Example usage:
3849 
3850    Consider the following 8x8 matrix with 34 non-zero values, that is
3851    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3852    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3853    as follows:
3854 
3855 .vb
3856             1  2  0  |  0  3  0  |  0  4
3857     Proc0   0  5  6  |  7  0  0  |  8  0
3858             9  0 10  | 11  0  0  | 12  0
3859     -------------------------------------
3860            13  0 14  | 15 16 17  |  0  0
3861     Proc1   0 18  0  | 19 20 21  |  0  0
3862             0  0  0  | 22 23  0  | 24  0
3863     -------------------------------------
3864     Proc2  25 26 27  |  0  0 28  | 29  0
3865            30  0  0  | 31 32 33  |  0 34
3866 .ve
3867 
3868    This can be represented as a collection of submatrices as:
3869 
3870 .vb
3871       A B C
3872       D E F
3873       G H I
3874 .ve
3875 
3876    Where the submatrices A,B,C are owned by proc0, D,E,F are
3877    owned by proc1, G,H,I are owned by proc2.
3878 
3879    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3880    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3881    The 'M','N' parameters are 8,8, and have the same values on all procs.
3882 
3883    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3884    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3885    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3886    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3887    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3888    matrix, ans [DF] as another SeqAIJ matrix.
3889 
3890    When d_nz, o_nz parameters are specified, d_nz storage elements are
3891    allocated for every row of the local diagonal submatrix, and o_nz
3892    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3893    One way to choose d_nz and o_nz is to use the max nonzerors per local
3894    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3895    In this case, the values of d_nz,o_nz are:
3896 .vb
3897      proc0 : dnz = 2, o_nz = 2
3898      proc1 : dnz = 3, o_nz = 2
3899      proc2 : dnz = 1, o_nz = 4
3900 .ve
3901    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3902    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3903    for proc3. i.e we are using 12+15+10=37 storage locations to store
3904    34 values.
3905 
3906    When d_nnz, o_nnz parameters are specified, the storage is specified
3907    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3908    In the above case the values for d_nnz,o_nnz are:
3909 .vb
3910      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3911      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3912      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3913 .ve
3914    Here the space allocated is sum of all the above values i.e 34, and
3915    hence pre-allocation is perfect.
3916 
3917    Level: intermediate
3918 
3919 .keywords: matrix, aij, compressed row, sparse, parallel
3920 
3921 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3922           MPIAIJ, MatGetInfo()
3923 @*/
3924 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3925 {
3926   PetscErrorCode ierr;
3927 
3928   PetscFunctionBegin;
3929   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3930   PetscValidType(B,1);
3931   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 #undef __FUNCT__
3936 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3937 /*@
3938      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3939          CSR format the local rows.
3940 
3941    Collective on MPI_Comm
3942 
3943    Input Parameters:
3944 +  comm - MPI communicator
3945 .  m - number of local rows (Cannot be PETSC_DECIDE)
3946 .  n - This value should be the same as the local size used in creating the
3947        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3948        calculated if N is given) For square matrices n is almost always m.
3949 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3950 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3951 .   i - row indices
3952 .   j - column indices
3953 -   a - matrix values
3954 
3955    Output Parameter:
3956 .   mat - the matrix
3957 
3958    Level: intermediate
3959 
3960    Notes:
3961        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3962      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3963      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3964 
3965        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3966 
3967        The format which is used for the sparse matrix input, is equivalent to a
3968     row-major ordering.. i.e for the following matrix, the input data expected is
3969     as shown:
3970 
3971         1 0 0
3972         2 0 3     P0
3973        -------
3974         4 5 6     P1
3975 
3976      Process0 [P0]: rows_owned=[0,1]
3977         i =  {0,1,3}  [size = nrow+1  = 2+1]
3978         j =  {0,0,2}  [size = nz = 6]
3979         v =  {1,2,3}  [size = nz = 6]
3980 
3981      Process1 [P1]: rows_owned=[2]
3982         i =  {0,3}    [size = nrow+1  = 1+1]
3983         j =  {0,1,2}  [size = nz = 6]
3984         v =  {4,5,6}  [size = nz = 6]
3985 
3986 .keywords: matrix, aij, compressed row, sparse, parallel
3987 
3988 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3989           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3990 @*/
3991 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3992 {
3993   PetscErrorCode ierr;
3994 
3995   PetscFunctionBegin;
3996   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3997   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3998   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3999   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4000   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4001   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4002   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4003   PetscFunctionReturn(0);
4004 }
4005 
4006 #undef __FUNCT__
4007 #define __FUNCT__ "MatCreateAIJ"
4008 /*@C
4009    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4010    (the default parallel PETSc format).  For good matrix assembly performance
4011    the user should preallocate the matrix storage by setting the parameters
4012    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4013    performance can be increased by more than a factor of 50.
4014 
4015    Collective on MPI_Comm
4016 
4017    Input Parameters:
4018 +  comm - MPI communicator
4019 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4020            This value should be the same as the local size used in creating the
4021            y vector for the matrix-vector product y = Ax.
4022 .  n - This value should be the same as the local size used in creating the
4023        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4024        calculated if N is given) For square matrices n is almost always m.
4025 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4026 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4027 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4028            (same value is used for all local rows)
4029 .  d_nnz - array containing the number of nonzeros in the various rows of the
4030            DIAGONAL portion of the local submatrix (possibly different for each row)
4031            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4032            The size of this array is equal to the number of local rows, i.e 'm'.
4033 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4034            submatrix (same value is used for all local rows).
4035 -  o_nnz - array containing the number of nonzeros in the various rows of the
4036            OFF-DIAGONAL portion of the local submatrix (possibly different for
4037            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4038            structure. The size of this array is equal to the number
4039            of local rows, i.e 'm'.
4040 
4041    Output Parameter:
4042 .  A - the matrix
4043 
4044    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4045    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4046    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4047 
4048    Notes:
4049    If the *_nnz parameter is given then the *_nz parameter is ignored
4050 
4051    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4052    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4053    storage requirements for this matrix.
4054 
4055    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4056    processor than it must be used on all processors that share the object for
4057    that argument.
4058 
4059    The user MUST specify either the local or global matrix dimensions
4060    (possibly both).
4061 
4062    The parallel matrix is partitioned across processors such that the
4063    first m0 rows belong to process 0, the next m1 rows belong to
4064    process 1, the next m2 rows belong to process 2 etc.. where
4065    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4066    values corresponding to [m x N] submatrix.
4067 
4068    The columns are logically partitioned with the n0 columns belonging
4069    to 0th partition, the next n1 columns belonging to the next
4070    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4071 
4072    The DIAGONAL portion of the local submatrix on any given processor
4073    is the submatrix corresponding to the rows and columns m,n
4074    corresponding to the given processor. i.e diagonal matrix on
4075    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4076    etc. The remaining portion of the local submatrix [m x (N-n)]
4077    constitute the OFF-DIAGONAL portion. The example below better
4078    illustrates this concept.
4079 
4080    For a square global matrix we define each processor's diagonal portion
4081    to be its local rows and the corresponding columns (a square submatrix);
4082    each processor's off-diagonal portion encompasses the remainder of the
4083    local matrix (a rectangular submatrix).
4084 
4085    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4086 
4087    When calling this routine with a single process communicator, a matrix of
4088    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4089    type of communicator, use the construction mechanism:
4090      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4091 
4092    By default, this format uses inodes (identical nodes) when possible.
4093    We search for consecutive rows with the same nonzero structure, thereby
4094    reusing matrix information to achieve increased efficiency.
4095 
4096    Options Database Keys:
4097 +  -mat_no_inode  - Do not use inodes
4098 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4099 -  -mat_aij_oneindex - Internally use indexing starting at 1
4100         rather than 0.  Note that when calling MatSetValues(),
4101         the user still MUST index entries starting at 0!
4102 
4103 
4104    Example usage:
4105 
4106    Consider the following 8x8 matrix with 34 non-zero values, that is
4107    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4108    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4109    as follows:
4110 
4111 .vb
4112             1  2  0  |  0  3  0  |  0  4
4113     Proc0   0  5  6  |  7  0  0  |  8  0
4114             9  0 10  | 11  0  0  | 12  0
4115     -------------------------------------
4116            13  0 14  | 15 16 17  |  0  0
4117     Proc1   0 18  0  | 19 20 21  |  0  0
4118             0  0  0  | 22 23  0  | 24  0
4119     -------------------------------------
4120     Proc2  25 26 27  |  0  0 28  | 29  0
4121            30  0  0  | 31 32 33  |  0 34
4122 .ve
4123 
4124    This can be represented as a collection of submatrices as:
4125 
4126 .vb
4127       A B C
4128       D E F
4129       G H I
4130 .ve
4131 
4132    Where the submatrices A,B,C are owned by proc0, D,E,F are
4133    owned by proc1, G,H,I are owned by proc2.
4134 
4135    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4136    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4137    The 'M','N' parameters are 8,8, and have the same values on all procs.
4138 
4139    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4140    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4141    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4142    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4143    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4144    matrix, ans [DF] as another SeqAIJ matrix.
4145 
4146    When d_nz, o_nz parameters are specified, d_nz storage elements are
4147    allocated for every row of the local diagonal submatrix, and o_nz
4148    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4149    One way to choose d_nz and o_nz is to use the max nonzerors per local
4150    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4151    In this case, the values of d_nz,o_nz are:
4152 .vb
4153      proc0 : dnz = 2, o_nz = 2
4154      proc1 : dnz = 3, o_nz = 2
4155      proc2 : dnz = 1, o_nz = 4
4156 .ve
4157    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4158    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4159    for proc3. i.e we are using 12+15+10=37 storage locations to store
4160    34 values.
4161 
4162    When d_nnz, o_nnz parameters are specified, the storage is specified
4163    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4164    In the above case the values for d_nnz,o_nnz are:
4165 .vb
4166      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4167      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4168      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4169 .ve
4170    Here the space allocated is sum of all the above values i.e 34, and
4171    hence pre-allocation is perfect.
4172 
4173    Level: intermediate
4174 
4175 .keywords: matrix, aij, compressed row, sparse, parallel
4176 
4177 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4178           MPIAIJ, MatCreateMPIAIJWithArrays()
4179 @*/
4180 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4181 {
4182   PetscErrorCode ierr;
4183   PetscMPIInt    size;
4184 
4185   PetscFunctionBegin;
4186   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4187   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4188   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4189   if (size > 1) {
4190     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4191     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4192   } else {
4193     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4194     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4195   }
4196   PetscFunctionReturn(0);
4197 }
4198 
4199 #undef __FUNCT__
4200 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4201 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4202 {
4203   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4204 
4205   PetscFunctionBegin;
4206   *Ad     = a->A;
4207   *Ao     = a->B;
4208   *colmap = a->garray;
4209   PetscFunctionReturn(0);
4210 }
4211 
4212 #undef __FUNCT__
4213 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4214 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4215 {
4216   PetscErrorCode ierr;
4217   PetscInt       i;
4218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4219 
4220   PetscFunctionBegin;
4221   if (coloring->ctype == IS_COLORING_GLOBAL) {
4222     ISColoringValue *allcolors,*colors;
4223     ISColoring      ocoloring;
4224 
4225     /* set coloring for diagonal portion */
4226     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4227 
4228     /* set coloring for off-diagonal portion */
4229     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4230     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4231     for (i=0; i<a->B->cmap->n; i++) {
4232       colors[i] = allcolors[a->garray[i]];
4233     }
4234     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4235     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4236     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4237     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4238   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4239     ISColoringValue *colors;
4240     PetscInt        *larray;
4241     ISColoring      ocoloring;
4242 
4243     /* set coloring for diagonal portion */
4244     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4245     for (i=0; i<a->A->cmap->n; i++) {
4246       larray[i] = i + A->cmap->rstart;
4247     }
4248     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4249     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4250     for (i=0; i<a->A->cmap->n; i++) {
4251       colors[i] = coloring->colors[larray[i]];
4252     }
4253     ierr = PetscFree(larray);CHKERRQ(ierr);
4254     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4255     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4256     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4257 
4258     /* set coloring for off-diagonal portion */
4259     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4260     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4261     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4262     for (i=0; i<a->B->cmap->n; i++) {
4263       colors[i] = coloring->colors[larray[i]];
4264     }
4265     ierr = PetscFree(larray);CHKERRQ(ierr);
4266     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4267     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4268     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4269   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4270 
4271   PetscFunctionReturn(0);
4272 }
4273 
4274 #if defined(PETSC_HAVE_ADIC)
4275 #undef __FUNCT__
4276 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4277 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4278 {
4279   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4280   PetscErrorCode ierr;
4281 
4282   PetscFunctionBegin;
4283   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4284   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4285   PetscFunctionReturn(0);
4286 }
4287 #endif
4288 
4289 #undef __FUNCT__
4290 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4291 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4292 {
4293   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4294   PetscErrorCode ierr;
4295 
4296   PetscFunctionBegin;
4297   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4298   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4299   PetscFunctionReturn(0);
4300 }
4301 
4302 #undef __FUNCT__
4303 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4304 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4305 {
4306   PetscErrorCode ierr;
4307   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4308   PetscInt       *indx;
4309 
4310   PetscFunctionBegin;
4311   /* This routine will ONLY return MPIAIJ type matrix */
4312   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4313   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4314   if (n == PETSC_DECIDE){
4315     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4316   }
4317   /* Check sum(n) = N */
4318   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4319   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4320 
4321   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4322   rstart -= m;
4323 
4324   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4325   for (i=0;i<m;i++) {
4326     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4327     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4328     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4329   }
4330 
4331   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4332   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4333   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4334   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4335   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4336   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4337   PetscFunctionReturn(0);
4338 }
4339 
4340 #undef __FUNCT__
4341 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4342 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4343 {
4344   PetscErrorCode ierr;
4345   PetscInt       m,N,i,rstart,nnz,Ii;
4346   PetscInt       *indx;
4347   PetscScalar    *values;
4348 
4349   PetscFunctionBegin;
4350   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4351   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4352   for (i=0;i<m;i++) {
4353     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4354     Ii    = i + rstart;
4355     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4356     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4357   }
4358   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4359   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4360   PetscFunctionReturn(0);
4361 }
4362 
4363 #undef __FUNCT__
4364 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4365 /*@
4366       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4367                  matrices from each processor
4368 
4369     Collective on MPI_Comm
4370 
4371    Input Parameters:
4372 +    comm - the communicators the parallel matrix will live on
4373 .    inmat - the input sequential matrices
4374 .    n - number of local columns (or PETSC_DECIDE)
4375 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4376 
4377    Output Parameter:
4378 .    outmat - the parallel matrix generated
4379 
4380     Level: advanced
4381 
4382    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4383 
4384 @*/
4385 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4386 {
4387   PetscErrorCode ierr;
4388 
4389   PetscFunctionBegin;
4390   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4391   if (scall == MAT_INITIAL_MATRIX){
4392     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4393   }
4394   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4395   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4396   PetscFunctionReturn(0);
4397 }
4398 
4399 #undef __FUNCT__
4400 #define __FUNCT__ "MatFileSplit"
4401 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4402 {
4403   PetscErrorCode    ierr;
4404   PetscMPIInt       rank;
4405   PetscInt          m,N,i,rstart,nnz;
4406   size_t            len;
4407   const PetscInt    *indx;
4408   PetscViewer       out;
4409   char              *name;
4410   Mat               B;
4411   const PetscScalar *values;
4412 
4413   PetscFunctionBegin;
4414   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4415   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4416   /* Should this be the type of the diagonal block of A? */
4417   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4418   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4419   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4420   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4421   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4422   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4423   for (i=0;i<m;i++) {
4424     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4425     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4426     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4427   }
4428   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4429   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4430 
4431   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4432   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4433   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4434   sprintf(name,"%s.%d",outfile,rank);
4435   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4436   ierr = PetscFree(name);
4437   ierr = MatView(B,out);CHKERRQ(ierr);
4438   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4439   ierr = MatDestroy(&B);CHKERRQ(ierr);
4440   PetscFunctionReturn(0);
4441 }
4442 
4443 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4444 #undef __FUNCT__
4445 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4446 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4447 {
4448   PetscErrorCode       ierr;
4449   Mat_Merge_SeqsToMPI  *merge;
4450   PetscContainer       container;
4451 
4452   PetscFunctionBegin;
4453   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4454   if (container) {
4455     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4456     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4457     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4458     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4459     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4460     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4461     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4462     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4463     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4464     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4466     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4468     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4469     ierr = PetscFree(merge);CHKERRQ(ierr);
4470     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4471   }
4472   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4473   PetscFunctionReturn(0);
4474 }
4475 
4476 #include <../src/mat/utils/freespace.h>
4477 #include <petscbt.h>
4478 
4479 #undef __FUNCT__
4480 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4481 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4482 {
4483   PetscErrorCode       ierr;
4484   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4485   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4486   PetscMPIInt          size,rank,taga,*len_s;
4487   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4488   PetscInt             proc,m;
4489   PetscInt             **buf_ri,**buf_rj;
4490   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4491   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4492   MPI_Request          *s_waits,*r_waits;
4493   MPI_Status           *status;
4494   MatScalar            *aa=a->a;
4495   MatScalar            **abuf_r,*ba_i;
4496   Mat_Merge_SeqsToMPI  *merge;
4497   PetscContainer       container;
4498 
4499   PetscFunctionBegin;
4500   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4501 
4502   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4503   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4504 
4505   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4506   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4507 
4508   bi     = merge->bi;
4509   bj     = merge->bj;
4510   buf_ri = merge->buf_ri;
4511   buf_rj = merge->buf_rj;
4512 
4513   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4514   owners = merge->rowmap->range;
4515   len_s  = merge->len_s;
4516 
4517   /* send and recv matrix values */
4518   /*-----------------------------*/
4519   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4520   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4521 
4522   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4523   for (proc=0,k=0; proc<size; proc++){
4524     if (!len_s[proc]) continue;
4525     i = owners[proc];
4526     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4527     k++;
4528   }
4529 
4530   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4531   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4532   ierr = PetscFree(status);CHKERRQ(ierr);
4533 
4534   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4535   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4536 
4537   /* insert mat values of mpimat */
4538   /*----------------------------*/
4539   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4540   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4541 
4542   for (k=0; k<merge->nrecv; k++){
4543     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4544     nrows = *(buf_ri_k[k]);
4545     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4546     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4547   }
4548 
4549   /* set values of ba */
4550   m = merge->rowmap->n;
4551   for (i=0; i<m; i++) {
4552     arow = owners[rank] + i;
4553     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4554     bnzi = bi[i+1] - bi[i];
4555     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4556 
4557     /* add local non-zero vals of this proc's seqmat into ba */
4558     anzi = ai[arow+1] - ai[arow];
4559     aj   = a->j + ai[arow];
4560     aa   = a->a + ai[arow];
4561     nextaj = 0;
4562     for (j=0; nextaj<anzi; j++){
4563       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4564         ba_i[j] += aa[nextaj++];
4565       }
4566     }
4567 
4568     /* add received vals into ba */
4569     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4570       /* i-th row */
4571       if (i == *nextrow[k]) {
4572         anzi = *(nextai[k]+1) - *nextai[k];
4573         aj   = buf_rj[k] + *(nextai[k]);
4574         aa   = abuf_r[k] + *(nextai[k]);
4575         nextaj = 0;
4576         for (j=0; nextaj<anzi; j++){
4577           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4578             ba_i[j] += aa[nextaj++];
4579           }
4580         }
4581         nextrow[k]++; nextai[k]++;
4582       }
4583     }
4584     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4585   }
4586   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4587   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4588 
4589   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4590   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4591   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4592   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4593   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4594   PetscFunctionReturn(0);
4595 }
4596 
4597 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4598 
4599 #undef __FUNCT__
4600 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4601 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4602 {
4603   PetscErrorCode       ierr;
4604   Mat                  B_mpi;
4605   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4606   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4607   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4608   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4609   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4610   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4611   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4612   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4613   MPI_Status           *status;
4614   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4615   PetscBT              lnkbt;
4616   Mat_Merge_SeqsToMPI  *merge;
4617   PetscContainer       container;
4618 
4619   PetscFunctionBegin;
4620   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4621 
4622   /* make sure it is a PETSc comm */
4623   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4624   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4625   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4626 
4627   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4628   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4629 
4630   /* determine row ownership */
4631   /*---------------------------------------------------------*/
4632   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4633   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4634   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4635   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4636   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4637   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4638   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4639 
4640   m      = merge->rowmap->n;
4641   M      = merge->rowmap->N;
4642   owners = merge->rowmap->range;
4643 
4644   /* determine the number of messages to send, their lengths */
4645   /*---------------------------------------------------------*/
4646   len_s  = merge->len_s;
4647 
4648   len = 0;  /* length of buf_si[] */
4649   merge->nsend = 0;
4650   for (proc=0; proc<size; proc++){
4651     len_si[proc] = 0;
4652     if (proc == rank){
4653       len_s[proc] = 0;
4654     } else {
4655       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4656       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4657     }
4658     if (len_s[proc]) {
4659       merge->nsend++;
4660       nrows = 0;
4661       for (i=owners[proc]; i<owners[proc+1]; i++){
4662         if (ai[i+1] > ai[i]) nrows++;
4663       }
4664       len_si[proc] = 2*(nrows+1);
4665       len += len_si[proc];
4666     }
4667   }
4668 
4669   /* determine the number and length of messages to receive for ij-structure */
4670   /*-------------------------------------------------------------------------*/
4671   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4672   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4673 
4674   /* post the Irecv of j-structure */
4675   /*-------------------------------*/
4676   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4677   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4678 
4679   /* post the Isend of j-structure */
4680   /*--------------------------------*/
4681   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4682 
4683   for (proc=0, k=0; proc<size; proc++){
4684     if (!len_s[proc]) continue;
4685     i = owners[proc];
4686     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4687     k++;
4688   }
4689 
4690   /* receives and sends of j-structure are complete */
4691   /*------------------------------------------------*/
4692   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4693   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4694 
4695   /* send and recv i-structure */
4696   /*---------------------------*/
4697   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4698   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4699 
4700   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4701   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4702   for (proc=0,k=0; proc<size; proc++){
4703     if (!len_s[proc]) continue;
4704     /* form outgoing message for i-structure:
4705          buf_si[0]:                 nrows to be sent
4706                [1:nrows]:           row index (global)
4707                [nrows+1:2*nrows+1]: i-structure index
4708     */
4709     /*-------------------------------------------*/
4710     nrows = len_si[proc]/2 - 1;
4711     buf_si_i    = buf_si + nrows+1;
4712     buf_si[0]   = nrows;
4713     buf_si_i[0] = 0;
4714     nrows = 0;
4715     for (i=owners[proc]; i<owners[proc+1]; i++){
4716       anzi = ai[i+1] - ai[i];
4717       if (anzi) {
4718         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4719         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4720         nrows++;
4721       }
4722     }
4723     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4724     k++;
4725     buf_si += len_si[proc];
4726   }
4727 
4728   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4729   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4730 
4731   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4732   for (i=0; i<merge->nrecv; i++){
4733     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4734   }
4735 
4736   ierr = PetscFree(len_si);CHKERRQ(ierr);
4737   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4738   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4739   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4740   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4741   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4742   ierr = PetscFree(status);CHKERRQ(ierr);
4743 
4744   /* compute a local seq matrix in each processor */
4745   /*----------------------------------------------*/
4746   /* allocate bi array and free space for accumulating nonzero column info */
4747   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4748   bi[0] = 0;
4749 
4750   /* create and initialize a linked list */
4751   nlnk = N+1;
4752   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4753 
4754   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4755   len = 0;
4756   len  = ai[owners[rank+1]] - ai[owners[rank]];
4757   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4758   current_space = free_space;
4759 
4760   /* determine symbolic info for each local row */
4761   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4762 
4763   for (k=0; k<merge->nrecv; k++){
4764     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4765     nrows = *buf_ri_k[k];
4766     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4767     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4768   }
4769 
4770   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4771   len = 0;
4772   for (i=0;i<m;i++) {
4773     bnzi   = 0;
4774     /* add local non-zero cols of this proc's seqmat into lnk */
4775     arow   = owners[rank] + i;
4776     anzi   = ai[arow+1] - ai[arow];
4777     aj     = a->j + ai[arow];
4778     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4779     bnzi += nlnk;
4780     /* add received col data into lnk */
4781     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4782       if (i == *nextrow[k]) { /* i-th row */
4783         anzi = *(nextai[k]+1) - *nextai[k];
4784         aj   = buf_rj[k] + *nextai[k];
4785         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4786         bnzi += nlnk;
4787         nextrow[k]++; nextai[k]++;
4788       }
4789     }
4790     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4791 
4792     /* if free space is not available, make more free space */
4793     if (current_space->local_remaining<bnzi) {
4794       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4795       nspacedouble++;
4796     }
4797     /* copy data into free space, then initialize lnk */
4798     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4799     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4800 
4801     current_space->array           += bnzi;
4802     current_space->local_used      += bnzi;
4803     current_space->local_remaining -= bnzi;
4804 
4805     bi[i+1] = bi[i] + bnzi;
4806   }
4807 
4808   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4809 
4810   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4811   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4812   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4813 
4814   /* create symbolic parallel matrix B_mpi */
4815   /*---------------------------------------*/
4816     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4817   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4818   if (n==PETSC_DECIDE) {
4819     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4820   } else {
4821     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4822   }
4823   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4824   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4825   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4826   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4827   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4828 
4829   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4830   B_mpi->assembled     = PETSC_FALSE;
4831   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4832   merge->bi            = bi;
4833   merge->bj            = bj;
4834   merge->buf_ri        = buf_ri;
4835   merge->buf_rj        = buf_rj;
4836   merge->coi           = PETSC_NULL;
4837   merge->coj           = PETSC_NULL;
4838   merge->owners_co     = PETSC_NULL;
4839 
4840   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4841 
4842   /* attach the supporting struct to B_mpi for reuse */
4843   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4844   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4845   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4846   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4847   *mpimat = B_mpi;
4848 
4849   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4850   PetscFunctionReturn(0);
4851 }
4852 
4853 #undef __FUNCT__
4854 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4855 /*@C
4856       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4857                  matrices from each processor
4858 
4859     Collective on MPI_Comm
4860 
4861    Input Parameters:
4862 +    comm - the communicators the parallel matrix will live on
4863 .    seqmat - the input sequential matrices
4864 .    m - number of local rows (or PETSC_DECIDE)
4865 .    n - number of local columns (or PETSC_DECIDE)
4866 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4867 
4868    Output Parameter:
4869 .    mpimat - the parallel matrix generated
4870 
4871     Level: advanced
4872 
4873    Notes:
4874      The dimensions of the sequential matrix in each processor MUST be the same.
4875      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4876      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4877 @*/
4878 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4879 {
4880   PetscErrorCode   ierr;
4881   PetscMPIInt     size;
4882 
4883   PetscFunctionBegin;
4884   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4885   if (size == 1){
4886      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4887      if (scall == MAT_INITIAL_MATRIX){
4888        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4889      } else {
4890        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4891      }
4892      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4893      PetscFunctionReturn(0);
4894   }
4895   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4896   if (scall == MAT_INITIAL_MATRIX){
4897     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4898   }
4899   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4900   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4901   PetscFunctionReturn(0);
4902 }
4903 
4904 #undef __FUNCT__
4905 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4906 /*@
4907      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4908           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4909           with MatGetSize()
4910 
4911     Not Collective
4912 
4913    Input Parameters:
4914 +    A - the matrix
4915 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4916 
4917    Output Parameter:
4918 .    A_loc - the local sequential matrix generated
4919 
4920     Level: developer
4921 
4922 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4923 
4924 @*/
4925 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4926 {
4927   PetscErrorCode  ierr;
4928   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4929   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4930   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4931   MatScalar       *aa=a->a,*ba=b->a,*cam;
4932   PetscScalar     *ca;
4933   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4934   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4935   PetscBool       match;
4936 
4937   PetscFunctionBegin;
4938   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4939   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4940   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4941   if (scall == MAT_INITIAL_MATRIX){
4942     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4943     ci[0] = 0;
4944     for (i=0; i<am; i++){
4945       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4946     }
4947     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4948     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4949     k = 0;
4950     for (i=0; i<am; i++) {
4951       ncols_o = bi[i+1] - bi[i];
4952       ncols_d = ai[i+1] - ai[i];
4953       /* off-diagonal portion of A */
4954       for (jo=0; jo<ncols_o; jo++) {
4955         col = cmap[*bj];
4956         if (col >= cstart) break;
4957         cj[k]   = col; bj++;
4958         ca[k++] = *ba++;
4959       }
4960       /* diagonal portion of A */
4961       for (j=0; j<ncols_d; j++) {
4962         cj[k]   = cstart + *aj++;
4963         ca[k++] = *aa++;
4964       }
4965       /* off-diagonal portion of A */
4966       for (j=jo; j<ncols_o; j++) {
4967         cj[k]   = cmap[*bj++];
4968         ca[k++] = *ba++;
4969       }
4970     }
4971     /* put together the new matrix */
4972     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4973     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4974     /* Since these are PETSc arrays, change flags to free them as necessary. */
4975     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4976     mat->free_a  = PETSC_TRUE;
4977     mat->free_ij = PETSC_TRUE;
4978     mat->nonew   = 0;
4979   } else if (scall == MAT_REUSE_MATRIX){
4980     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4981     ci = mat->i; cj = mat->j; cam = mat->a;
4982     for (i=0; i<am; i++) {
4983       /* off-diagonal portion of A */
4984       ncols_o = bi[i+1] - bi[i];
4985       for (jo=0; jo<ncols_o; jo++) {
4986         col = cmap[*bj];
4987         if (col >= cstart) break;
4988         *cam++ = *ba++; bj++;
4989       }
4990       /* diagonal portion of A */
4991       ncols_d = ai[i+1] - ai[i];
4992       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4993       /* off-diagonal portion of A */
4994       for (j=jo; j<ncols_o; j++) {
4995         *cam++ = *ba++; bj++;
4996       }
4997     }
4998   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4999   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5000   PetscFunctionReturn(0);
5001 }
5002 
5003 #undef __FUNCT__
5004 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5005 /*@C
5006      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5007 
5008     Not Collective
5009 
5010    Input Parameters:
5011 +    A - the matrix
5012 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5013 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5014 
5015    Output Parameter:
5016 .    A_loc - the local sequential matrix generated
5017 
5018     Level: developer
5019 
5020 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5021 
5022 @*/
5023 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5024 {
5025   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5026   PetscErrorCode    ierr;
5027   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5028   IS                isrowa,iscola;
5029   Mat               *aloc;
5030   PetscBool       match;
5031 
5032   PetscFunctionBegin;
5033   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5034   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5035   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5036   if (!row){
5037     start = A->rmap->rstart; end = A->rmap->rend;
5038     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5039   } else {
5040     isrowa = *row;
5041   }
5042   if (!col){
5043     start = A->cmap->rstart;
5044     cmap  = a->garray;
5045     nzA   = a->A->cmap->n;
5046     nzB   = a->B->cmap->n;
5047     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5048     ncols = 0;
5049     for (i=0; i<nzB; i++) {
5050       if (cmap[i] < start) idx[ncols++] = cmap[i];
5051       else break;
5052     }
5053     imark = i;
5054     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5055     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5056     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5057   } else {
5058     iscola = *col;
5059   }
5060   if (scall != MAT_INITIAL_MATRIX){
5061     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5062     aloc[0] = *A_loc;
5063   }
5064   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5065   *A_loc = aloc[0];
5066   ierr = PetscFree(aloc);CHKERRQ(ierr);
5067   if (!row){
5068     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5069   }
5070   if (!col){
5071     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5072   }
5073   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5074   PetscFunctionReturn(0);
5075 }
5076 
5077 #undef __FUNCT__
5078 #define __FUNCT__ "MatGetBrowsOfAcols"
5079 /*@C
5080     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5081 
5082     Collective on Mat
5083 
5084    Input Parameters:
5085 +    A,B - the matrices in mpiaij format
5086 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5087 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5088 
5089    Output Parameter:
5090 +    rowb, colb - index sets of rows and columns of B to extract
5091 -    B_seq - the sequential matrix generated
5092 
5093     Level: developer
5094 
5095 @*/
5096 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5097 {
5098   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5099   PetscErrorCode    ierr;
5100   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5101   IS                isrowb,iscolb;
5102   Mat               *bseq=PETSC_NULL;
5103 
5104   PetscFunctionBegin;
5105   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5106     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5107   }
5108   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5109 
5110   if (scall == MAT_INITIAL_MATRIX){
5111     start = A->cmap->rstart;
5112     cmap  = a->garray;
5113     nzA   = a->A->cmap->n;
5114     nzB   = a->B->cmap->n;
5115     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5116     ncols = 0;
5117     for (i=0; i<nzB; i++) {  /* row < local row index */
5118       if (cmap[i] < start) idx[ncols++] = cmap[i];
5119       else break;
5120     }
5121     imark = i;
5122     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5123     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5124     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5125     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5126   } else {
5127     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5128     isrowb = *rowb; iscolb = *colb;
5129     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5130     bseq[0] = *B_seq;
5131   }
5132   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5133   *B_seq = bseq[0];
5134   ierr = PetscFree(bseq);CHKERRQ(ierr);
5135   if (!rowb){
5136     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5137   } else {
5138     *rowb = isrowb;
5139   }
5140   if (!colb){
5141     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5142   } else {
5143     *colb = iscolb;
5144   }
5145   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5146   PetscFunctionReturn(0);
5147 }
5148 
5149 #undef __FUNCT__
5150 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5151 /*
5152     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5153     of the OFF-DIAGONAL portion of local A
5154 
5155     Collective on Mat
5156 
5157    Input Parameters:
5158 +    A,B - the matrices in mpiaij format
5159 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5160 
5161    Output Parameter:
5162 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5163 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5164 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5165 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5166 
5167     Level: developer
5168 
5169 */
5170 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5171 {
5172   VecScatter_MPI_General *gen_to,*gen_from;
5173   PetscErrorCode         ierr;
5174   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5175   Mat_SeqAIJ             *b_oth;
5176   VecScatter             ctx=a->Mvctx;
5177   MPI_Comm               comm=((PetscObject)ctx)->comm;
5178   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5179   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5180   PetscScalar            *rvalues,*svalues;
5181   MatScalar              *b_otha,*bufa,*bufA;
5182   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5183   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5184   MPI_Status             *sstatus,rstatus;
5185   PetscMPIInt            jj;
5186   PetscInt               *cols,sbs,rbs;
5187   PetscScalar            *vals;
5188 
5189   PetscFunctionBegin;
5190   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5191     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5192   }
5193   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5195 
5196   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5197   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5198   rvalues  = gen_from->values; /* holds the length of receiving row */
5199   svalues  = gen_to->values;   /* holds the length of sending row */
5200   nrecvs   = gen_from->n;
5201   nsends   = gen_to->n;
5202 
5203   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5204   srow     = gen_to->indices;   /* local row index to be sent */
5205   sstarts  = gen_to->starts;
5206   sprocs   = gen_to->procs;
5207   sstatus  = gen_to->sstatus;
5208   sbs      = gen_to->bs;
5209   rstarts  = gen_from->starts;
5210   rprocs   = gen_from->procs;
5211   rbs      = gen_from->bs;
5212 
5213   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5214   if (scall == MAT_INITIAL_MATRIX){
5215     /* i-array */
5216     /*---------*/
5217     /*  post receives */
5218     for (i=0; i<nrecvs; i++){
5219       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5220       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5221       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5222     }
5223 
5224     /* pack the outgoing message */
5225     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5226     sstartsj[0] = 0;  rstartsj[0] = 0;
5227     len = 0; /* total length of j or a array to be sent */
5228     k = 0;
5229     for (i=0; i<nsends; i++){
5230       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5231       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5232       for (j=0; j<nrows; j++) {
5233         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5234         for (l=0; l<sbs; l++){
5235           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5236           rowlen[j*sbs+l] = ncols;
5237           len += ncols;
5238           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5239         }
5240         k++;
5241       }
5242       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5243       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5244     }
5245     /* recvs and sends of i-array are completed */
5246     i = nrecvs;
5247     while (i--) {
5248       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5249     }
5250     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5251 
5252     /* allocate buffers for sending j and a arrays */
5253     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5254     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5255 
5256     /* create i-array of B_oth */
5257     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5258     b_othi[0] = 0;
5259     len = 0; /* total length of j or a array to be received */
5260     k = 0;
5261     for (i=0; i<nrecvs; i++){
5262       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5263       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5264       for (j=0; j<nrows; j++) {
5265         b_othi[k+1] = b_othi[k] + rowlen[j];
5266         len += rowlen[j]; k++;
5267       }
5268       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5269     }
5270 
5271     /* allocate space for j and a arrrays of B_oth */
5272     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5273     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5274 
5275     /* j-array */
5276     /*---------*/
5277     /*  post receives of j-array */
5278     for (i=0; i<nrecvs; i++){
5279       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5280       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5281     }
5282 
5283     /* pack the outgoing message j-array */
5284     k = 0;
5285     for (i=0; i<nsends; i++){
5286       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5287       bufJ = bufj+sstartsj[i];
5288       for (j=0; j<nrows; j++) {
5289         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5290         for (ll=0; ll<sbs; ll++){
5291           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5292           for (l=0; l<ncols; l++){
5293             *bufJ++ = cols[l];
5294           }
5295           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5296         }
5297       }
5298       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5299     }
5300 
5301     /* recvs and sends of j-array are completed */
5302     i = nrecvs;
5303     while (i--) {
5304       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5305     }
5306     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5307   } else if (scall == MAT_REUSE_MATRIX){
5308     sstartsj = *startsj_s;
5309     rstartsj = *startsj_r;
5310     bufa     = *bufa_ptr;
5311     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5312     b_otha   = b_oth->a;
5313   } else {
5314     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5315   }
5316 
5317   /* a-array */
5318   /*---------*/
5319   /*  post receives of a-array */
5320   for (i=0; i<nrecvs; i++){
5321     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5322     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5323   }
5324 
5325   /* pack the outgoing message a-array */
5326   k = 0;
5327   for (i=0; i<nsends; i++){
5328     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5329     bufA = bufa+sstartsj[i];
5330     for (j=0; j<nrows; j++) {
5331       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5332       for (ll=0; ll<sbs; ll++){
5333         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5334         for (l=0; l<ncols; l++){
5335           *bufA++ = vals[l];
5336         }
5337         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5338       }
5339     }
5340     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5341   }
5342   /* recvs and sends of a-array are completed */
5343   i = nrecvs;
5344   while (i--) {
5345     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5346   }
5347   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5348   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5349 
5350   if (scall == MAT_INITIAL_MATRIX){
5351     /* put together the new matrix */
5352     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5353 
5354     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5355     /* Since these are PETSc arrays, change flags to free them as necessary. */
5356     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5357     b_oth->free_a  = PETSC_TRUE;
5358     b_oth->free_ij = PETSC_TRUE;
5359     b_oth->nonew   = 0;
5360 
5361     ierr = PetscFree(bufj);CHKERRQ(ierr);
5362     if (!startsj_s || !bufa_ptr){
5363       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5364       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5365     } else {
5366       *startsj_s = sstartsj;
5367       *startsj_r = rstartsj;
5368       *bufa_ptr  = bufa;
5369     }
5370   }
5371   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5372   PetscFunctionReturn(0);
5373 }
5374 
5375 #undef __FUNCT__
5376 #define __FUNCT__ "MatGetCommunicationStructs"
5377 /*@C
5378   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5379 
5380   Not Collective
5381 
5382   Input Parameters:
5383 . A - The matrix in mpiaij format
5384 
5385   Output Parameter:
5386 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5387 . colmap - A map from global column index to local index into lvec
5388 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5389 
5390   Level: developer
5391 
5392 @*/
5393 #if defined (PETSC_USE_CTABLE)
5394 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5395 #else
5396 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5397 #endif
5398 {
5399   Mat_MPIAIJ *a;
5400 
5401   PetscFunctionBegin;
5402   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5403   PetscValidPointer(lvec, 2);
5404   PetscValidPointer(colmap, 3);
5405   PetscValidPointer(multScatter, 4);
5406   a = (Mat_MPIAIJ *) A->data;
5407   if (lvec) *lvec = a->lvec;
5408   if (colmap) *colmap = a->colmap;
5409   if (multScatter) *multScatter = a->Mvctx;
5410   PetscFunctionReturn(0);
5411 }
5412 
5413 EXTERN_C_BEGIN
5414 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5415 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5416 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5417 EXTERN_C_END
5418 
5419 #undef __FUNCT__
5420 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5421 /*
5422     Computes (B'*A')' since computing B*A directly is untenable
5423 
5424                n                       p                          p
5425         (              )       (              )         (                  )
5426       m (      A       )  *  n (       B      )   =   m (         C        )
5427         (              )       (              )         (                  )
5428 
5429 */
5430 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5431 {
5432   PetscErrorCode     ierr;
5433   Mat                At,Bt,Ct;
5434 
5435   PetscFunctionBegin;
5436   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5437   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5438   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5439   ierr = MatDestroy(&At);CHKERRQ(ierr);
5440   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5441   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5442   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5443   PetscFunctionReturn(0);
5444 }
5445 
5446 #undef __FUNCT__
5447 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5448 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5449 {
5450   PetscErrorCode ierr;
5451   PetscInt       m=A->rmap->n,n=B->cmap->n;
5452   Mat            Cmat;
5453 
5454   PetscFunctionBegin;
5455   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5456   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5457   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5458   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5459   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5460   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5461   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5462   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5463   *C   = Cmat;
5464   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5465   PetscFunctionReturn(0);
5466 }
5467 
5468 /* ----------------------------------------------------------------*/
5469 #undef __FUNCT__
5470 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5471 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5472 {
5473   PetscErrorCode ierr;
5474 
5475   PetscFunctionBegin;
5476   if (scall == MAT_INITIAL_MATRIX){
5477     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5478   }
5479   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5480   PetscFunctionReturn(0);
5481 }
5482 
5483 EXTERN_C_BEGIN
5484 #if defined(PETSC_HAVE_MUMPS)
5485 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5486 #endif
5487 #if defined(PETSC_HAVE_PASTIX)
5488 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5489 #endif
5490 #if defined(PETSC_HAVE_SUPERLU_DIST)
5491 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5492 #endif
5493 #if defined(PETSC_HAVE_SPOOLES)
5494 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5495 #endif
5496 EXTERN_C_END
5497 
5498 /*MC
5499    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5500 
5501    Options Database Keys:
5502 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5503 
5504   Level: beginner
5505 
5506 .seealso: MatCreateAIJ()
5507 M*/
5508 
5509 EXTERN_C_BEGIN
5510 #undef __FUNCT__
5511 #define __FUNCT__ "MatCreate_MPIAIJ"
5512 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5513 {
5514   Mat_MPIAIJ     *b;
5515   PetscErrorCode ierr;
5516   PetscMPIInt    size;
5517 
5518   PetscFunctionBegin;
5519   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5520 
5521   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5522   B->data         = (void*)b;
5523   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5524   B->assembled    = PETSC_FALSE;
5525 
5526   B->insertmode   = NOT_SET_VALUES;
5527   b->size         = size;
5528   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5529 
5530   /* build cache for off array entries formed */
5531   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5532   b->donotstash  = PETSC_FALSE;
5533   b->colmap      = 0;
5534   b->garray      = 0;
5535   b->roworiented = PETSC_TRUE;
5536 
5537   /* stuff used for matrix vector multiply */
5538   b->lvec      = PETSC_NULL;
5539   b->Mvctx     = PETSC_NULL;
5540 
5541   /* stuff for MatGetRow() */
5542   b->rowindices   = 0;
5543   b->rowvalues    = 0;
5544   b->getrowactive = PETSC_FALSE;
5545 
5546 #if defined(PETSC_HAVE_TXPETSCGPU)
5547   /* stuff for GPU capabilities */
5548   b->diagGPUMatFormat = MAT_CSR;
5549   b->offdiagGPUMatFormat = MAT_CSR;
5550 #endif
5551 
5552 #if defined(PETSC_HAVE_SPOOLES)
5553   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5554                                      "MatGetFactor_mpiaij_spooles",
5555                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5556 #endif
5557 #if defined(PETSC_HAVE_MUMPS)
5558   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5559                                      "MatGetFactor_aij_mumps",
5560                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5561 #endif
5562 #if defined(PETSC_HAVE_PASTIX)
5563   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5564 					   "MatGetFactor_mpiaij_pastix",
5565 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5566 #endif
5567 #if defined(PETSC_HAVE_SUPERLU_DIST)
5568   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5569                                      "MatGetFactor_mpiaij_superlu_dist",
5570                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5571 #endif
5572   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5573                                      "MatStoreValues_MPIAIJ",
5574                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5576                                      "MatRetrieveValues_MPIAIJ",
5577                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5578   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5579 				     "MatGetDiagonalBlock_MPIAIJ",
5580                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5581   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5582 				     "MatIsTranspose_MPIAIJ",
5583 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5585 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5586 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5588 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5589 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5590   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5591 				     "MatDiagonalScaleLocal_MPIAIJ",
5592 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5594                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5595                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5597                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5598                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5600                                      "MatConvert_MPIAIJ_MPISBAIJ",
5601                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5603                                      "MatMatMult_MPIDense_MPIAIJ",
5604                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5606                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5607                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5609                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5610                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5612   PetscFunctionReturn(0);
5613 }
5614 EXTERN_C_END
5615 
5616 #undef __FUNCT__
5617 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5618 /*@
5619      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5620          and "off-diagonal" part of the matrix in CSR format.
5621 
5622    Collective on MPI_Comm
5623 
5624    Input Parameters:
5625 +  comm - MPI communicator
5626 .  m - number of local rows (Cannot be PETSC_DECIDE)
5627 .  n - This value should be the same as the local size used in creating the
5628        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5629        calculated if N is given) For square matrices n is almost always m.
5630 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5631 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5632 .   i - row indices for "diagonal" portion of matrix
5633 .   j - column indices
5634 .   a - matrix values
5635 .   oi - row indices for "off-diagonal" portion of matrix
5636 .   oj - column indices
5637 -   oa - matrix values
5638 
5639    Output Parameter:
5640 .   mat - the matrix
5641 
5642    Level: advanced
5643 
5644    Notes:
5645        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5646        must free the arrays once the matrix has been destroyed and not before.
5647 
5648        The i and j indices are 0 based
5649 
5650        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5651 
5652        This sets local rows and cannot be used to set off-processor values.
5653 
5654        You cannot later use MatSetValues() to change values in this matrix.
5655 
5656 .keywords: matrix, aij, compressed row, sparse, parallel
5657 
5658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5659           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5660 @*/
5661 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5662 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5663 {
5664   PetscErrorCode ierr;
5665   Mat_MPIAIJ     *maij;
5666 
5667  PetscFunctionBegin;
5668   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5669   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5670   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5671   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5672   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5673   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5674   maij = (Mat_MPIAIJ*) (*mat)->data;
5675   maij->donotstash     = PETSC_TRUE;
5676   (*mat)->preallocated = PETSC_TRUE;
5677 
5678   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5679   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5680 
5681   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5682   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5683 
5684   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5685   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5686   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5688 
5689   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691   PetscFunctionReturn(0);
5692 }
5693 
5694 /*
5695     Special version for direct calls from Fortran
5696 */
5697 #include <petsc-private/fortranimpl.h>
5698 
5699 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5700 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5701 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5702 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5703 #endif
5704 
5705 /* Change these macros so can be used in void function */
5706 #undef CHKERRQ
5707 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5708 #undef SETERRQ2
5709 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5710 #undef SETERRQ3
5711 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5712 #undef SETERRQ
5713 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5714 
5715 EXTERN_C_BEGIN
5716 #undef __FUNCT__
5717 #define __FUNCT__ "matsetvaluesmpiaij_"
5718 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5719 {
5720   Mat             mat = *mmat;
5721   PetscInt        m = *mm, n = *mn;
5722   InsertMode      addv = *maddv;
5723   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5724   PetscScalar     value;
5725   PetscErrorCode  ierr;
5726 
5727   MatCheckPreallocated(mat,1);
5728   if (mat->insertmode == NOT_SET_VALUES) {
5729     mat->insertmode = addv;
5730   }
5731 #if defined(PETSC_USE_DEBUG)
5732   else if (mat->insertmode != addv) {
5733     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5734   }
5735 #endif
5736   {
5737   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5738   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5739   PetscBool       roworiented = aij->roworiented;
5740 
5741   /* Some Variables required in the macro */
5742   Mat             A = aij->A;
5743   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5744   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5745   MatScalar       *aa = a->a;
5746   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5747   Mat             B = aij->B;
5748   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5749   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5750   MatScalar       *ba = b->a;
5751 
5752   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5753   PetscInt        nonew = a->nonew;
5754   MatScalar       *ap1,*ap2;
5755 
5756   PetscFunctionBegin;
5757   for (i=0; i<m; i++) {
5758     if (im[i] < 0) continue;
5759 #if defined(PETSC_USE_DEBUG)
5760     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5761 #endif
5762     if (im[i] >= rstart && im[i] < rend) {
5763       row      = im[i] - rstart;
5764       lastcol1 = -1;
5765       rp1      = aj + ai[row];
5766       ap1      = aa + ai[row];
5767       rmax1    = aimax[row];
5768       nrow1    = ailen[row];
5769       low1     = 0;
5770       high1    = nrow1;
5771       lastcol2 = -1;
5772       rp2      = bj + bi[row];
5773       ap2      = ba + bi[row];
5774       rmax2    = bimax[row];
5775       nrow2    = bilen[row];
5776       low2     = 0;
5777       high2    = nrow2;
5778 
5779       for (j=0; j<n; j++) {
5780         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5781         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5782         if (in[j] >= cstart && in[j] < cend){
5783           col = in[j] - cstart;
5784           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5785         } else if (in[j] < 0) continue;
5786 #if defined(PETSC_USE_DEBUG)
5787         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5788 #endif
5789         else {
5790           if (mat->was_assembled) {
5791             if (!aij->colmap) {
5792               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5793             }
5794 #if defined (PETSC_USE_CTABLE)
5795             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5796 	    col--;
5797 #else
5798             col = aij->colmap[in[j]] - 1;
5799 #endif
5800             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5801               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5802               col =  in[j];
5803               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5804               B = aij->B;
5805               b = (Mat_SeqAIJ*)B->data;
5806               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5807               rp2      = bj + bi[row];
5808               ap2      = ba + bi[row];
5809               rmax2    = bimax[row];
5810               nrow2    = bilen[row];
5811               low2     = 0;
5812               high2    = nrow2;
5813               bm       = aij->B->rmap->n;
5814               ba = b->a;
5815             }
5816           } else col = in[j];
5817           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5818         }
5819       }
5820     } else {
5821       if (!aij->donotstash) {
5822         if (roworiented) {
5823           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5824         } else {
5825           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5826         }
5827       }
5828     }
5829   }}
5830   PetscFunctionReturnVoid();
5831 }
5832 EXTERN_C_END
5833 
5834