xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f272c775a5aef80b210f67d9dc2d40d333c2400a)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   if (x) {
1004     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008   }
1009   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1010 
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<slen; i++) {
1014     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1015   }
1016 
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use){
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++){
1023       n   = ii[i+1] - ii[i];
1024       aj  = aij->j + ii[i];
1025       aa  = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa        = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n   = ii[i+1] - ii[i];
1041       aj  = aij->j + ii[i];
1042       aa  = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa    = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* wait on sends */
1062   if (nsends) {
1063     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1064     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1065     ierr = PetscFree(send_status);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1068   ierr = PetscFree(svalues);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMult_MPIAIJ"
1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       nt;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1083   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscBool      merged;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1128   /* do nondiagonal part */
1129   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1130   if (!merged) {
1131     /* send it on its way */
1132     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133     /* do local part */
1134     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1135     /* receive remote parts: note this assumes the values are not actually */
1136     /* added in yy until the next line, */
1137     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   } else {
1139     /* do local part */
1140     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141     /* send it on its way */
1142     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143     /* values actually were received in the Begin() but we need to call this nop */
1144     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 EXTERN_C_BEGIN
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1152 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1153 {
1154   MPI_Comm       comm;
1155   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157   IS             Me,Notme;
1158   PetscErrorCode ierr;
1159   PetscInt       M,N,first,last,*notme,i;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163 
1164   /* Easy test: symmetric diagonal block */
1165   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1167   if (!*f) PetscFunctionReturn(0);
1168   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170   if (size == 1) PetscFunctionReturn(0);
1171 
1172   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1175   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1176   for (i=0; i<first; i++) notme[i] = i;
1177   for (i=last; i<M; i++) notme[i-last+first] = i;
1178   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1179   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1180   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1181   Aoff = Aoffs[0];
1182   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1183   Boff = Boffs[0];
1184   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1185   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1187   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1189   ierr = PetscFree(notme);CHKERRQ(ierr);
1190    PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220 {
1221   PetscErrorCode ierr;
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIAIJ"
1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatDestroy_MPIAIJ"
1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247 {
1248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252 #if defined(PETSC_USE_LOG)
1253   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254 #endif
1255   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1256   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1257   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1259 #if defined (PETSC_USE_CTABLE)
1260   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1261 #else
1262   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1266   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1267   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1268   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1269   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286 {
1287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1289   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1292   int               fd;
1293   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1294   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295   PetscScalar       *column_values;
1296   PetscInt          message_count,flowcontrolcount;
1297 
1298   PetscFunctionBegin;
1299   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1300   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1301   nz   = A->nz + B->nz;
1302   if (!rank) {
1303     header[0] = MAT_FILE_CLASSID;
1304     header[1] = mat->rmap->N;
1305     header[2] = mat->cmap->N;
1306     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1307     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1309     /* get largest number of rows any processor has */
1310     rlen = mat->rmap->n;
1311     range = mat->rmap->range;
1312     for (i=1; i<size; i++) {
1313       rlen = PetscMax(rlen,range[i+1] - range[i]);
1314     }
1315   } else {
1316     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1317     rlen = mat->rmap->n;
1318   }
1319 
1320   /* load up the local row counts */
1321   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1322   for (i=0; i<mat->rmap->n; i++) {
1323     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324   }
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1339     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* )th processor needs space a largest processor needs */
1346   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( (col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_indices[cnt++] = A->j[k] + cstart;
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_indices[cnt++] = garray[B->j[j]];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column indices to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1383 
1384   /* load up the local column values */
1385   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1386   cnt  = 0;
1387   for (i=0; i<mat->rmap->n; i++) {
1388     for (j=B->i[i]; j<B->i[i+1]; j++) {
1389       if ( garray[B->j[j]] > cstart) break;
1390       column_values[cnt++] = B->a[j];
1391     }
1392     for (k=A->i[i]; k<A->i[i+1]; k++) {
1393       column_values[cnt++] = A->a[k];
1394     }
1395     for (; j<B->i[i+1]; j++) {
1396       column_values[cnt++] = B->a[j];
1397     }
1398   }
1399   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1400 
1401   /* store the column values to the file */
1402    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1403   if (!rank) {
1404     MPI_Status status;
1405     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     for (i=1; i<size; i++) {
1407        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1408       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1409       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     }
1413     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1414   } else {
1415     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1416     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1419   }
1420   ierr = PetscFree(column_values);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427 {
1428   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1429   PetscErrorCode    ierr;
1430   PetscMPIInt       rank = aij->rank,size = aij->size;
1431   PetscBool         isdraw,iascii,isbinary;
1432   PetscViewer       sviewer;
1433   PetscViewerFormat format;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   if (iascii) {
1440     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1441     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442       MatInfo    info;
1443       PetscBool  inodes;
1444 
1445       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1446       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1449       if (!inodes) {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       }
1456       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1458       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1460       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1463       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466       PetscInt   inodecount,inodelimit,*inodes;
1467       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1468       if (inodes) {
1469         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1472       }
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475       PetscFunctionReturn(0);
1476     }
1477   } else if (isbinary) {
1478     if (size == 1) {
1479       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1480       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1481     } else {
1482       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1483     }
1484     PetscFunctionReturn(0);
1485   } else if (isdraw) {
1486     PetscDraw  draw;
1487     PetscBool  isnull;
1488     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1489     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1490   }
1491 
1492   if (size == 1) {
1493     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1495   } else {
1496     /* assemble the entire matrix onto first processor. */
1497     Mat         A;
1498     Mat_SeqAIJ  *Aloc;
1499     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500     MatScalar   *a;
1501 
1502     if (mat->rmap->N > 1024) {
1503       PetscBool  flg = PETSC_FALSE;
1504 
1505       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1506       if (!flg) {
1507         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508       }
1509     }
1510 
1511     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1512     if (!rank) {
1513       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1514     } else {
1515       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1516     }
1517     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1519     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1521 
1522     /* copy over the A part */
1523     Aloc = (Mat_SeqAIJ*)aij->A->data;
1524     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1525     row = mat->rmap->rstart;
1526     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1527     for (i=0; i<m; i++) {
1528       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1529       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1530     }
1531     aj = Aloc->j;
1532     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1533 
1534     /* copy over the B part */
1535     Aloc = (Mat_SeqAIJ*)aij->B->data;
1536     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1537     row  = mat->rmap->rstart;
1538     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1539     ct   = cols;
1540     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1541     for (i=0; i<m; i++) {
1542       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1543       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1544     }
1545     ierr = PetscFree(ct);CHKERRQ(ierr);
1546     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548     /*
1549        Everyone has to call to draw the matrix since the graphics waits are
1550        synchronized across all processors that share the PetscDraw object
1551     */
1552     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1553     if (!rank) {
1554       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1555       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1556       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1557       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1558     }
1559     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1560     ierr = MatDestroy(&A);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatView_MPIAIJ"
1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1568 {
1569   PetscErrorCode ierr;
1570   PetscBool      iascii,isdraw,issocket,isbinary;
1571 
1572   PetscFunctionBegin;
1573   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1574   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1575   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1576   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1577   if (iascii || isdraw || isbinary || issocket) {
1578     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1579   } else {
1580     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSOR_MPIAIJ"
1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1588 {
1589   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1590   PetscErrorCode ierr;
1591   Vec            bb1 = 0;
1592   PetscBool      hasop;
1593 
1594   PetscFunctionBegin;
1595   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1596     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1597   }
1598 
1599   if (flag == SOR_APPLY_UPPER) {
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1601     PetscFunctionReturn(0);
1602   }
1603 
1604   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1605     if (flag & SOR_ZERO_INITIAL_GUESS) {
1606       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607       its--;
1608     }
1609 
1610     while (its--) {
1611       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613 
1614       /* update rhs: bb1 = bb - B*x */
1615       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1616       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1617 
1618       /* local sweep */
1619       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1620     }
1621   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626     while (its--) {
1627       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629 
1630       /* update rhs: bb1 = bb - B*x */
1631       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1632       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1633 
1634       /* local sweep */
1635       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1636     }
1637   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1638     if (flag & SOR_ZERO_INITIAL_GUESS) {
1639       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1640       its--;
1641     }
1642     while (its--) {
1643       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645 
1646       /* update rhs: bb1 = bb - B*x */
1647       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1648       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1649 
1650       /* local sweep */
1651       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1652     }
1653   }  else if (flag & SOR_EISENSTAT) {
1654     Vec         xx1;
1655 
1656     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1657     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1658 
1659     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (!mat->diag) {
1662       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1663       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1664     }
1665     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1666     if (hasop) {
1667       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1668     } else {
1669       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1670     }
1671     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1672 
1673     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1674 
1675     /* local sweep */
1676     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1677     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1678     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1679   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1680 
1681   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatPermute_MPIAIJ"
1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1688 {
1689   MPI_Comm       comm,pcomm;
1690   PetscInt       first,local_size,nrows;
1691   const PetscInt *rows;
1692   PetscMPIInt    size;
1693   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1698   /* make a collective version of 'rowp' */
1699   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1700   if (pcomm==comm) {
1701     crowp = rowp;
1702   } else {
1703     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1704     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1705     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1706     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1707   }
1708   /* collect the global row permutation and invert it */
1709   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1710   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1711   if (pcomm!=comm) {
1712     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1713   }
1714   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1715   /* get the local target indices */
1716   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1717   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1718   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1719   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1720   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1721   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1722   /* the column permutation is so much easier;
1723      make a local version of 'colp' and invert it */
1724   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1725   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1726   if (size==1) {
1727     lcolp = colp;
1728   } else {
1729     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1730     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1731     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1732   }
1733   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1734   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1735   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1736   if (size>1) {
1737     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1738     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1739   }
1740   /* now we just get the submatrix */
1741   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1742   /* clean up */
1743   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1744   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1750 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1751 {
1752   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1753   Mat            A = mat->A,B = mat->B;
1754   PetscErrorCode ierr;
1755   PetscReal      isend[5],irecv[5];
1756 
1757   PetscFunctionBegin;
1758   info->block_size     = 1.0;
1759   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1760   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1761   isend[3] = info->memory;  isend[4] = info->mallocs;
1762   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1763   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764   isend[3] += info->memory;  isend[4] += info->mallocs;
1765   if (flag == MAT_LOCAL) {
1766     info->nz_used      = isend[0];
1767     info->nz_allocated = isend[1];
1768     info->nz_unneeded  = isend[2];
1769     info->memory       = isend[3];
1770     info->mallocs      = isend[4];
1771   } else if (flag == MAT_GLOBAL_MAX) {
1772     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1773     info->nz_used      = irecv[0];
1774     info->nz_allocated = irecv[1];
1775     info->nz_unneeded  = irecv[2];
1776     info->memory       = irecv[3];
1777     info->mallocs      = irecv[4];
1778   } else if (flag == MAT_GLOBAL_SUM) {
1779     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789 
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 #undef __FUNCT__
1794 #define __FUNCT__ "MatSetOption_MPIAIJ"
1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1796 {
1797   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1798   PetscErrorCode ierr;
1799 
1800   PetscFunctionBegin;
1801   switch (op) {
1802   case MAT_NEW_NONZERO_LOCATIONS:
1803   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1804   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1805   case MAT_KEEP_NONZERO_PATTERN:
1806   case MAT_NEW_NONZERO_LOCATION_ERR:
1807   case MAT_USE_INODES:
1808   case MAT_IGNORE_ZERO_ENTRIES:
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_ROW_ORIENTED:
1813     a->roworiented = flg;
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1819     break;
1820   case MAT_IGNORE_OFF_PROC_ENTRIES:
1821     a->donotstash = flg;
1822     break;
1823   case MAT_SPD:
1824     A->spd_set                         = PETSC_TRUE;
1825     A->spd                             = flg;
1826     if (flg) {
1827       A->symmetric                     = PETSC_TRUE;
1828       A->structurally_symmetric        = PETSC_TRUE;
1829       A->symmetric_set                 = PETSC_TRUE;
1830       A->structurally_symmetric_set    = PETSC_TRUE;
1831     }
1832     break;
1833   case MAT_SYMMETRIC:
1834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1835     break;
1836   case MAT_STRUCTURALLY_SYMMETRIC:
1837     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1838     break;
1839   case MAT_HERMITIAN:
1840     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1841     break;
1842   case MAT_SYMMETRY_ETERNAL:
1843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1844     break;
1845   default:
1846     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1847   }
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatGetRow_MPIAIJ"
1853 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1854 {
1855   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1856   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1857   PetscErrorCode ierr;
1858   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1859   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1860   PetscInt       *cmap,*idx_p;
1861 
1862   PetscFunctionBegin;
1863   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1864   mat->getrowactive = PETSC_TRUE;
1865 
1866   if (!mat->rowvalues && (idx || v)) {
1867     /*
1868         allocate enough space to hold information from the longest row.
1869     */
1870     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1871     PetscInt   max = 1,tmp;
1872     for (i=0; i<matin->rmap->n; i++) {
1873       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1874       if (max < tmp) { max = tmp; }
1875     }
1876     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1877   }
1878 
1879   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1880   lrow = row - rstart;
1881 
1882   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1883   if (!v)   {pvA = 0; pvB = 0;}
1884   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1885   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1886   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1887   nztot = nzA + nzB;
1888 
1889   cmap  = mat->garray;
1890   if (v  || idx) {
1891     if (nztot) {
1892       /* Sort by increasing column numbers, assuming A and B already sorted */
1893       PetscInt imark = -1;
1894       if (v) {
1895         *v = v_p = mat->rowvalues;
1896         for (i=0; i<nzB; i++) {
1897           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1898           else break;
1899         }
1900         imark = i;
1901         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1902         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1903       }
1904       if (idx) {
1905         *idx = idx_p = mat->rowindices;
1906         if (imark > -1) {
1907           for (i=0; i<imark; i++) {
1908             idx_p[i] = cmap[cworkB[i]];
1909           }
1910         } else {
1911           for (i=0; i<nzB; i++) {
1912             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1913             else break;
1914           }
1915           imark = i;
1916         }
1917         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1918         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1919       }
1920     } else {
1921       if (idx) *idx = 0;
1922       if (v)   *v   = 0;
1923     }
1924   }
1925   *nz = nztot;
1926   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1927   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1933 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1934 {
1935   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1936 
1937   PetscFunctionBegin;
1938   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1939   aij->getrowactive = PETSC_FALSE;
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatNorm_MPIAIJ"
1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1946 {
1947   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1948   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1949   PetscErrorCode ierr;
1950   PetscInt       i,j,cstart = mat->cmap->rstart;
1951   PetscReal      sum = 0.0;
1952   MatScalar      *v;
1953 
1954   PetscFunctionBegin;
1955   if (aij->size == 1) {
1956     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1957   } else {
1958     if (type == NORM_FROBENIUS) {
1959       v = amat->a;
1960       for (i=0; i<amat->nz; i++) {
1961 #if defined(PETSC_USE_COMPLEX)
1962         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963 #else
1964         sum += (*v)*(*v); v++;
1965 #endif
1966       }
1967       v = bmat->a;
1968       for (i=0; i<bmat->nz; i++) {
1969 #if defined(PETSC_USE_COMPLEX)
1970         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1971 #else
1972         sum += (*v)*(*v); v++;
1973 #endif
1974       }
1975       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1976       *norm = PetscSqrtReal(*norm);
1977     } else if (type == NORM_1) { /* max column norm */
1978       PetscReal *tmp,*tmp2;
1979       PetscInt  *jj,*garray = aij->garray;
1980       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1981       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1982       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1983       *norm = 0.0;
1984       v = amat->a; jj = amat->j;
1985       for (j=0; j<amat->nz; j++) {
1986         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1987       }
1988       v = bmat->a; jj = bmat->j;
1989       for (j=0; j<bmat->nz; j++) {
1990         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1991       }
1992       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1993       for (j=0; j<mat->cmap->N; j++) {
1994         if (tmp2[j] > *norm) *norm = tmp2[j];
1995       }
1996       ierr = PetscFree(tmp);CHKERRQ(ierr);
1997       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1998     } else if (type == NORM_INFINITY) { /* max row norm */
1999       PetscReal ntemp = 0.0;
2000       for (j=0; j<aij->A->rmap->n; j++) {
2001         v = amat->a + amat->i[j];
2002         sum = 0.0;
2003         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2004           sum += PetscAbsScalar(*v); v++;
2005         }
2006         v = bmat->a + bmat->i[j];
2007         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2008           sum += PetscAbsScalar(*v); v++;
2009         }
2010         if (sum > ntemp) ntemp = sum;
2011       }
2012       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2013     } else {
2014       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2015     }
2016   }
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatTranspose_MPIAIJ"
2022 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2023 {
2024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2025   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2026   PetscErrorCode ierr;
2027   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2028   PetscInt       cstart=A->cmap->rstart,ncol;
2029   Mat            B;
2030   MatScalar      *array;
2031 
2032   PetscFunctionBegin;
2033   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2034 
2035   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2036   ai = Aloc->i; aj = Aloc->j;
2037   bi = Bloc->i; bj = Bloc->j;
2038   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2039     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2040     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2041     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2042     for (i=0; i<ai[ma]; i++){
2043       d_nnz[aj[i]] ++;
2044       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2045     }
2046 
2047     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2048     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2049     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2050     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2051     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2052   } else {
2053     B = *matout;
2054   }
2055 
2056   /* copy over the A part */
2057   array = Aloc->a;
2058   row = A->rmap->rstart;
2059   for (i=0; i<ma; i++) {
2060     ncol = ai[i+1]-ai[i];
2061     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2062     row++; array += ncol; aj += ncol;
2063   }
2064   aj = Aloc->j;
2065   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2066 
2067   /* copy over the B part */
2068   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2069   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2070   array = Bloc->a;
2071   row = A->rmap->rstart;
2072   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2073   cols_tmp = cols;
2074   for (i=0; i<mb; i++) {
2075     ncol = bi[i+1]-bi[i];
2076     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2077     row++; array += ncol; cols_tmp += ncol;
2078   }
2079   ierr = PetscFree(cols);CHKERRQ(ierr);
2080 
2081   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2084     *matout = B;
2085   } else {
2086     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2087   }
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2093 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2094 {
2095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2096   Mat            a = aij->A,b = aij->B;
2097   PetscErrorCode ierr;
2098   PetscInt       s1,s2,s3;
2099 
2100   PetscFunctionBegin;
2101   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2102   if (rr) {
2103     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2104     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2105     /* Overlap communication with computation. */
2106     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2107   }
2108   if (ll) {
2109     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2110     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2111     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2112   }
2113   /* scale  the diagonal block */
2114   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2115 
2116   if (rr) {
2117     /* Do a scatter end and then right scale the off-diagonal block */
2118     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2119     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2120   }
2121 
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 #undef __FUNCT__
2126 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2127 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2128 {
2129   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2134   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2135   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2136   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatEqual_MPIAIJ"
2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2154 {
2155   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2156   Mat            a,b,c,d;
2157   PetscBool      flg;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   a = matA->A; b = matA->B;
2162   c = matB->A; d = matB->B;
2163 
2164   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2165   if (flg) {
2166     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2167   }
2168   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 #undef __FUNCT__
2173 #define __FUNCT__ "MatCopy_MPIAIJ"
2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2175 {
2176   PetscErrorCode ierr;
2177   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2178   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2179 
2180   PetscFunctionBegin;
2181   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2182   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2183     /* because of the column compression in the off-processor part of the matrix a->B,
2184        the number of columns in a->B and b->B may be different, hence we cannot call
2185        the MatCopy() directly on the two parts. If need be, we can provide a more
2186        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2187        then copying the submatrices */
2188     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2189   } else {
2190     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2191     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2192   }
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2198 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2199 {
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatAXPY_MPIAIJ"
2209 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2210 {
2211   PetscErrorCode ierr;
2212   PetscInt       i;
2213   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2214   PetscBLASInt   bnz,one=1;
2215   Mat_SeqAIJ     *x,*y;
2216 
2217   PetscFunctionBegin;
2218   if (str == SAME_NONZERO_PATTERN) {
2219     PetscScalar alpha = a;
2220     x = (Mat_SeqAIJ *)xx->A->data;
2221     y = (Mat_SeqAIJ *)yy->A->data;
2222     bnz = PetscBLASIntCast(x->nz);
2223     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2224     x = (Mat_SeqAIJ *)xx->B->data;
2225     y = (Mat_SeqAIJ *)yy->B->data;
2226     bnz = PetscBLASIntCast(x->nz);
2227     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2228   } else if (str == SUBSET_NONZERO_PATTERN) {
2229     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2230 
2231     x = (Mat_SeqAIJ *)xx->B->data;
2232     y = (Mat_SeqAIJ *)yy->B->data;
2233     if (y->xtoy && y->XtoY != xx->B) {
2234       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2235       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2236     }
2237     if (!y->xtoy) { /* get xtoy */
2238       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2239       y->XtoY = xx->B;
2240       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2241     }
2242     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2243   } else {
2244     Mat B;
2245     PetscInt *nnz_d,*nnz_o;
2246     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2247     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2248     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2249     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2250     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2251     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,B);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatConjugate_MPIAIJ"
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatRealPart_MPIAIJ"
2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2285 {
2286   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2291   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2298 {
2299   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2304   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #ifdef PETSC_HAVE_PBGL
2309 
2310 #include <boost/parallel/mpi/bsp_process_group.hpp>
2311 #include <boost/graph/distributed/ilu_default_graph.hpp>
2312 #include <boost/graph/distributed/ilu_0_block.hpp>
2313 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2314 #include <boost/graph/distributed/petsc/interface.hpp>
2315 #include <boost/multi_array.hpp>
2316 #include <boost/parallel/distributed_property_map->hpp>
2317 
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2320 /*
2321   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2322 */
2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2324 {
2325   namespace petsc = boost::distributed::petsc;
2326 
2327   namespace graph_dist = boost::graph::distributed;
2328   using boost::graph::distributed::ilu_default::process_group_type;
2329   using boost::graph::ilu_permuted;
2330 
2331   PetscBool       row_identity, col_identity;
2332   PetscContainer  c;
2333   PetscInt        m, n, M, N;
2334   PetscErrorCode  ierr;
2335 
2336   PetscFunctionBegin;
2337   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2338   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2339   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2340   if (!row_identity || !col_identity) {
2341     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2342   }
2343 
2344   process_group_type pg;
2345   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2346   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2347   lgraph_type&   level_graph = *lgraph_p;
2348   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2349 
2350   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2351   ilu_permuted(level_graph);
2352 
2353   /* put together the new matrix */
2354   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2355   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2356   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2357   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2358   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2359   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361 
2362   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2363   ierr = PetscContainerSetPointer(c, lgraph_p);
2364   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2365   ierr = PetscContainerDestroy(&c);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2372 {
2373   PetscFunctionBegin;
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatSolve_MPIAIJ"
2379 /*
2380   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2381 */
2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2383 {
2384   namespace graph_dist = boost::graph::distributed;
2385 
2386   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2387   lgraph_type*   lgraph_p;
2388   PetscContainer c;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2393   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2394   ierr = VecCopy(b, x);CHKERRQ(ierr);
2395 
2396   PetscScalar* array_x;
2397   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2398   PetscInt sx;
2399   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2400 
2401   PetscScalar* array_b;
2402   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2403   PetscInt sb;
2404   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2405 
2406   lgraph_type&   level_graph = *lgraph_p;
2407   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2408 
2409   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2410   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2411                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2412 
2413   typedef boost::iterator_property_map<array_ref_type::iterator,
2414                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2415   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2416                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2417 
2418   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2419 
2420   PetscFunctionReturn(0);
2421 }
2422 #endif
2423 
2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2425   PetscInt       nzlocal,nsends,nrecvs;
2426   PetscMPIInt    *send_rank,*recv_rank;
2427   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2428   PetscScalar    *sbuf_a,**rbuf_a;
2429   PetscErrorCode (*Destroy)(Mat);
2430 } Mat_Redundant;
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2435 {
2436   PetscErrorCode       ierr;
2437   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2438   PetscInt             i;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2442   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2443   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2444   for (i=0; i<redund->nrecvs; i++){
2445     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2446     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2447   }
2448   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2449   ierr = PetscFree(redund);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatDestroy_MatRedundant"
2455 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2456 {
2457   PetscErrorCode  ierr;
2458   PetscContainer  container;
2459   Mat_Redundant   *redund=PETSC_NULL;
2460 
2461   PetscFunctionBegin;
2462   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2463   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2464   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2465   A->ops->destroy = redund->Destroy;
2466   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2467   if (A->ops->destroy) {
2468     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2469   }
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2476 {
2477   PetscMPIInt    rank,size;
2478   MPI_Comm       comm=((PetscObject)mat)->comm;
2479   PetscErrorCode ierr;
2480   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2481   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2482   PetscInt       *rowrange=mat->rmap->range;
2483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2484   Mat            A=aij->A,B=aij->B,C=*matredundant;
2485   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2486   PetscScalar    *sbuf_a;
2487   PetscInt       nzlocal=a->nz+b->nz;
2488   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2489   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2490   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2491   MatScalar      *aworkA,*aworkB;
2492   PetscScalar    *vals;
2493   PetscMPIInt    tag1,tag2,tag3,imdex;
2494   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2495                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2496   MPI_Status     recv_status,*send_status;
2497   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2498   PetscInt       **rbuf_j=PETSC_NULL;
2499   PetscScalar    **rbuf_a=PETSC_NULL;
2500   Mat_Redundant  *redund=PETSC_NULL;
2501   PetscContainer container;
2502 
2503   PetscFunctionBegin;
2504   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2505   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2506 
2507   if (reuse == MAT_REUSE_MATRIX) {
2508     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2509     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2510     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2511     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2512     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2513     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2514     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2515     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2516 
2517     nsends    = redund->nsends;
2518     nrecvs    = redund->nrecvs;
2519     send_rank = redund->send_rank;
2520     recv_rank = redund->recv_rank;
2521     sbuf_nz   = redund->sbuf_nz;
2522     rbuf_nz   = redund->rbuf_nz;
2523     sbuf_j    = redund->sbuf_j;
2524     sbuf_a    = redund->sbuf_a;
2525     rbuf_j    = redund->rbuf_j;
2526     rbuf_a    = redund->rbuf_a;
2527   }
2528 
2529   if (reuse == MAT_INITIAL_MATRIX){
2530     PetscMPIInt  subrank,subsize;
2531     PetscInt     nleftover,np_subcomm;
2532     /* get the destination processors' id send_rank, nsends and nrecvs */
2533     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2534     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2535     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2536     np_subcomm = size/nsubcomm;
2537     nleftover  = size - nsubcomm*np_subcomm;
2538     nsends = 0; nrecvs = 0;
2539     for (i=0; i<size; i++){ /* i=rank*/
2540       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2541         send_rank[nsends] = i; nsends++;
2542         recv_rank[nrecvs++] = i;
2543       }
2544     }
2545     if (rank >= size - nleftover){/* this proc is a leftover processor */
2546       i = size-nleftover-1;
2547       j = 0;
2548       while (j < nsubcomm - nleftover){
2549         send_rank[nsends++] = i;
2550         i--; j++;
2551       }
2552     }
2553 
2554     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2555       for (i=0; i<nleftover; i++){
2556         recv_rank[nrecvs++] = size-nleftover+i;
2557       }
2558     }
2559 
2560     /* allocate sbuf_j, sbuf_a */
2561     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2562     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2563     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2564   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2565 
2566   /* copy mat's local entries into the buffers */
2567   if (reuse == MAT_INITIAL_MATRIX){
2568     rownz_max = 0;
2569     rptr = sbuf_j;
2570     cols = sbuf_j + rend-rstart + 1;
2571     vals = sbuf_a;
2572     rptr[0] = 0;
2573     for (i=0; i<rend-rstart; i++){
2574       row = i + rstart;
2575       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2576       ncols  = nzA + nzB;
2577       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2578       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2579       /* load the column indices for this row into cols */
2580       lwrite = 0;
2581       for (l=0; l<nzB; l++) {
2582         if ((ctmp = bmap[cworkB[l]]) < cstart){
2583           vals[lwrite]   = aworkB[l];
2584           cols[lwrite++] = ctmp;
2585         }
2586       }
2587       for (l=0; l<nzA; l++){
2588         vals[lwrite]   = aworkA[l];
2589         cols[lwrite++] = cstart + cworkA[l];
2590       }
2591       for (l=0; l<nzB; l++) {
2592         if ((ctmp = bmap[cworkB[l]]) >= cend){
2593           vals[lwrite]   = aworkB[l];
2594           cols[lwrite++] = ctmp;
2595         }
2596       }
2597       vals += ncols;
2598       cols += ncols;
2599       rptr[i+1] = rptr[i] + ncols;
2600       if (rownz_max < ncols) rownz_max = ncols;
2601     }
2602     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2603   } else { /* only copy matrix values into sbuf_a */
2604     rptr = sbuf_j;
2605     vals = sbuf_a;
2606     rptr[0] = 0;
2607     for (i=0; i<rend-rstart; i++){
2608       row = i + rstart;
2609       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2610       ncols  = nzA + nzB;
2611       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2612       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2613       lwrite = 0;
2614       for (l=0; l<nzB; l++) {
2615         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2616       }
2617       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2618       for (l=0; l<nzB; l++) {
2619         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2620       }
2621       vals += ncols;
2622       rptr[i+1] = rptr[i] + ncols;
2623     }
2624   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2625 
2626   /* send nzlocal to others, and recv other's nzlocal */
2627   /*--------------------------------------------------*/
2628   if (reuse == MAT_INITIAL_MATRIX){
2629     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2630     s_waits2 = s_waits3 + nsends;
2631     s_waits1 = s_waits2 + nsends;
2632     r_waits1 = s_waits1 + nsends;
2633     r_waits2 = r_waits1 + nrecvs;
2634     r_waits3 = r_waits2 + nrecvs;
2635   } else {
2636     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2637     r_waits3 = s_waits3 + nsends;
2638   }
2639 
2640   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2641   if (reuse == MAT_INITIAL_MATRIX){
2642     /* get new tags to keep the communication clean */
2643     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2644     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2645     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2646 
2647     /* post receives of other's nzlocal */
2648     for (i=0; i<nrecvs; i++){
2649       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2650     }
2651     /* send nzlocal to others */
2652     for (i=0; i<nsends; i++){
2653       sbuf_nz[i] = nzlocal;
2654       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2655     }
2656     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2657     count = nrecvs;
2658     while (count) {
2659       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2660       recv_rank[imdex] = recv_status.MPI_SOURCE;
2661       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2662       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2663 
2664       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2665       rbuf_nz[imdex] += i + 2;
2666       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2667       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2668       count--;
2669     }
2670     /* wait on sends of nzlocal */
2671     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2672     /* send mat->i,j to others, and recv from other's */
2673     /*------------------------------------------------*/
2674     for (i=0; i<nsends; i++){
2675       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2676       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2677     }
2678     /* wait on receives of mat->i,j */
2679     /*------------------------------*/
2680     count = nrecvs;
2681     while (count) {
2682       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2683       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2684       count--;
2685     }
2686     /* wait on sends of mat->i,j */
2687     /*---------------------------*/
2688     if (nsends) {
2689       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2690     }
2691   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2692 
2693   /* post receives, send and receive mat->a */
2694   /*----------------------------------------*/
2695   for (imdex=0; imdex<nrecvs; imdex++) {
2696     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2697   }
2698   for (i=0; i<nsends; i++){
2699     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2700   }
2701   count = nrecvs;
2702   while (count) {
2703     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2704     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2705     count--;
2706   }
2707   if (nsends) {
2708     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2709   }
2710 
2711   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2712 
2713   /* create redundant matrix */
2714   /*-------------------------*/
2715   if (reuse == MAT_INITIAL_MATRIX){
2716     /* compute rownz_max for preallocation */
2717     for (imdex=0; imdex<nrecvs; imdex++){
2718       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2719       rptr = rbuf_j[imdex];
2720       for (i=0; i<j; i++){
2721         ncols = rptr[i+1] - rptr[i];
2722         if (rownz_max < ncols) rownz_max = ncols;
2723       }
2724     }
2725 
2726     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2727     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2728     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2729     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2730     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2731   } else {
2732     C = *matredundant;
2733   }
2734 
2735   /* insert local matrix entries */
2736   rptr = sbuf_j;
2737   cols = sbuf_j + rend-rstart + 1;
2738   vals = sbuf_a;
2739   for (i=0; i<rend-rstart; i++){
2740     row   = i + rstart;
2741     ncols = rptr[i+1] - rptr[i];
2742     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2743     vals += ncols;
2744     cols += ncols;
2745   }
2746   /* insert received matrix entries */
2747   for (imdex=0; imdex<nrecvs; imdex++){
2748     rstart = rowrange[recv_rank[imdex]];
2749     rend   = rowrange[recv_rank[imdex]+1];
2750     rptr = rbuf_j[imdex];
2751     cols = rbuf_j[imdex] + rend-rstart + 1;
2752     vals = rbuf_a[imdex];
2753     for (i=0; i<rend-rstart; i++){
2754       row   = i + rstart;
2755       ncols = rptr[i+1] - rptr[i];
2756       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2757       vals += ncols;
2758       cols += ncols;
2759     }
2760   }
2761   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2763   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2764   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2765   if (reuse == MAT_INITIAL_MATRIX) {
2766     PetscContainer container;
2767     *matredundant = C;
2768     /* create a supporting struct and attach it to C for reuse */
2769     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2770     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2771     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2772     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2773     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2774     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2775 
2776     redund->nzlocal = nzlocal;
2777     redund->nsends  = nsends;
2778     redund->nrecvs  = nrecvs;
2779     redund->send_rank = send_rank;
2780     redund->recv_rank = recv_rank;
2781     redund->sbuf_nz = sbuf_nz;
2782     redund->rbuf_nz = rbuf_nz;
2783     redund->sbuf_j  = sbuf_j;
2784     redund->sbuf_a  = sbuf_a;
2785     redund->rbuf_j  = rbuf_j;
2786     redund->rbuf_a  = rbuf_a;
2787 
2788     redund->Destroy = C->ops->destroy;
2789     C->ops->destroy = MatDestroy_MatRedundant;
2790   }
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2796 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2797 {
2798   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2799   PetscErrorCode ierr;
2800   PetscInt       i,*idxb = 0;
2801   PetscScalar    *va,*vb;
2802   Vec            vtmp;
2803 
2804   PetscFunctionBegin;
2805   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2806   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2807   if (idx) {
2808     for (i=0; i<A->rmap->n; i++) {
2809       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2810     }
2811   }
2812 
2813   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2814   if (idx) {
2815     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2816   }
2817   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2818   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2819 
2820   for (i=0; i<A->rmap->n; i++){
2821     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2822       va[i] = vb[i];
2823       if (idx) idx[i] = a->garray[idxb[i]];
2824     }
2825   }
2826 
2827   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2828   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2829   ierr = PetscFree(idxb);CHKERRQ(ierr);
2830   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 #undef __FUNCT__
2835 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2836 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2837 {
2838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2839   PetscErrorCode ierr;
2840   PetscInt       i,*idxb = 0;
2841   PetscScalar    *va,*vb;
2842   Vec            vtmp;
2843 
2844   PetscFunctionBegin;
2845   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2846   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2847   if (idx) {
2848     for (i=0; i<A->cmap->n; i++) {
2849       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2850     }
2851   }
2852 
2853   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2854   if (idx) {
2855     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2856   }
2857   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2858   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2859 
2860   for (i=0; i<A->rmap->n; i++){
2861     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2862       va[i] = vb[i];
2863       if (idx) idx[i] = a->garray[idxb[i]];
2864     }
2865   }
2866 
2867   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2868   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2869   ierr = PetscFree(idxb);CHKERRQ(ierr);
2870   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2876 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2877 {
2878   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2879   PetscInt       n      = A->rmap->n;
2880   PetscInt       cstart = A->cmap->rstart;
2881   PetscInt      *cmap   = mat->garray;
2882   PetscInt      *diagIdx, *offdiagIdx;
2883   Vec            diagV, offdiagV;
2884   PetscScalar   *a, *diagA, *offdiagA;
2885   PetscInt       r;
2886   PetscErrorCode ierr;
2887 
2888   PetscFunctionBegin;
2889   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2890   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2891   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2892   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2893   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2894   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2895   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2896   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2897   for(r = 0; r < n; ++r) {
2898     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2899       a[r]   = diagA[r];
2900       idx[r] = cstart + diagIdx[r];
2901     } else {
2902       a[r]   = offdiagA[r];
2903       idx[r] = cmap[offdiagIdx[r]];
2904     }
2905   }
2906   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2907   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2908   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2909   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2910   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2911   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 #undef __FUNCT__
2916 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2917 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2918 {
2919   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2920   PetscInt       n      = A->rmap->n;
2921   PetscInt       cstart = A->cmap->rstart;
2922   PetscInt      *cmap   = mat->garray;
2923   PetscInt      *diagIdx, *offdiagIdx;
2924   Vec            diagV, offdiagV;
2925   PetscScalar   *a, *diagA, *offdiagA;
2926   PetscInt       r;
2927   PetscErrorCode ierr;
2928 
2929   PetscFunctionBegin;
2930   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2931   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2932   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2933   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2934   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2935   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2936   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2937   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2938   for(r = 0; r < n; ++r) {
2939     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2940       a[r]   = diagA[r];
2941       idx[r] = cstart + diagIdx[r];
2942     } else {
2943       a[r]   = offdiagA[r];
2944       idx[r] = cmap[offdiagIdx[r]];
2945     }
2946   }
2947   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2948   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2949   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2950   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2951   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2952   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2958 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2959 {
2960   PetscErrorCode ierr;
2961   Mat            *dummy;
2962 
2963   PetscFunctionBegin;
2964   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2965   *newmat = *dummy;
2966   ierr = PetscFree(dummy);CHKERRQ(ierr);
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2974 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2975 {
2976   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2977   PetscErrorCode ierr;
2978 
2979   PetscFunctionBegin;
2980   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 
2985 /* -------------------------------------------------------------------*/
2986 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2987        MatGetRow_MPIAIJ,
2988        MatRestoreRow_MPIAIJ,
2989        MatMult_MPIAIJ,
2990 /* 4*/ MatMultAdd_MPIAIJ,
2991        MatMultTranspose_MPIAIJ,
2992        MatMultTransposeAdd_MPIAIJ,
2993 #ifdef PETSC_HAVE_PBGL
2994        MatSolve_MPIAIJ,
2995 #else
2996        0,
2997 #endif
2998        0,
2999        0,
3000 /*10*/ 0,
3001        0,
3002        0,
3003        MatSOR_MPIAIJ,
3004        MatTranspose_MPIAIJ,
3005 /*15*/ MatGetInfo_MPIAIJ,
3006        MatEqual_MPIAIJ,
3007        MatGetDiagonal_MPIAIJ,
3008        MatDiagonalScale_MPIAIJ,
3009        MatNorm_MPIAIJ,
3010 /*20*/ MatAssemblyBegin_MPIAIJ,
3011        MatAssemblyEnd_MPIAIJ,
3012        MatSetOption_MPIAIJ,
3013        MatZeroEntries_MPIAIJ,
3014 /*24*/ MatZeroRows_MPIAIJ,
3015        0,
3016 #ifdef PETSC_HAVE_PBGL
3017        0,
3018 #else
3019        0,
3020 #endif
3021        0,
3022        0,
3023 /*29*/ MatSetUpPreallocation_MPIAIJ,
3024 #ifdef PETSC_HAVE_PBGL
3025        0,
3026 #else
3027        0,
3028 #endif
3029        0,
3030        0,
3031        0,
3032 /*34*/ MatDuplicate_MPIAIJ,
3033        0,
3034        0,
3035        0,
3036        0,
3037 /*39*/ MatAXPY_MPIAIJ,
3038        MatGetSubMatrices_MPIAIJ,
3039        MatIncreaseOverlap_MPIAIJ,
3040        MatGetValues_MPIAIJ,
3041        MatCopy_MPIAIJ,
3042 /*44*/ MatGetRowMax_MPIAIJ,
3043        MatScale_MPIAIJ,
3044        0,
3045        0,
3046        MatZeroRowsColumns_MPIAIJ,
3047 /*49*/ MatSetBlockSize_MPIAIJ,
3048        0,
3049        0,
3050        0,
3051        0,
3052 /*54*/ MatFDColoringCreate_MPIAIJ,
3053        0,
3054        MatSetUnfactored_MPIAIJ,
3055        MatPermute_MPIAIJ,
3056        0,
3057 /*59*/ MatGetSubMatrix_MPIAIJ,
3058        MatDestroy_MPIAIJ,
3059        MatView_MPIAIJ,
3060        0,
3061        0,
3062 /*64*/ 0,
3063        0,
3064        0,
3065        0,
3066        0,
3067 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3068        MatGetRowMinAbs_MPIAIJ,
3069        0,
3070        MatSetColoring_MPIAIJ,
3071 #if defined(PETSC_HAVE_ADIC)
3072        MatSetValuesAdic_MPIAIJ,
3073 #else
3074        0,
3075 #endif
3076        MatSetValuesAdifor_MPIAIJ,
3077 /*75*/ MatFDColoringApply_AIJ,
3078        0,
3079        0,
3080        0,
3081        0,
3082 /*80*/ 0,
3083        0,
3084        0,
3085 /*83*/ MatLoad_MPIAIJ,
3086        0,
3087        0,
3088        0,
3089        0,
3090        0,
3091 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3092        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3093        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3094        MatPtAP_Basic,
3095        MatPtAPSymbolic_MPIAIJ,
3096 /*94*/ MatPtAPNumeric_MPIAIJ,
3097        0,
3098        0,
3099        0,
3100        0,
3101 /*99*/ 0,
3102        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3103        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3104        MatConjugate_MPIAIJ,
3105        0,
3106 /*104*/MatSetValuesRow_MPIAIJ,
3107        MatRealPart_MPIAIJ,
3108        MatImaginaryPart_MPIAIJ,
3109        0,
3110        0,
3111 /*109*/0,
3112        MatGetRedundantMatrix_MPIAIJ,
3113        MatGetRowMin_MPIAIJ,
3114        0,
3115        0,
3116 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3117        0,
3118        0,
3119        0,
3120        0,
3121 /*119*/0,
3122        0,
3123        0,
3124        0,
3125        MatGetMultiProcBlock_MPIAIJ,
3126 /*124*/MatFindNonZeroRows_MPIAIJ,
3127        MatGetColumnNorms_MPIAIJ,
3128        MatInvertBlockDiagonal_MPIAIJ,
3129        0,
3130        MatGetSubMatricesParallel_MPIAIJ,
3131 /*129*/0
3132 };
3133 
3134 /* ----------------------------------------------------------------------------------------*/
3135 
3136 EXTERN_C_BEGIN
3137 #undef __FUNCT__
3138 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3139 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3140 {
3141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3142   PetscErrorCode ierr;
3143 
3144   PetscFunctionBegin;
3145   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3146   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3147   PetscFunctionReturn(0);
3148 }
3149 EXTERN_C_END
3150 
3151 EXTERN_C_BEGIN
3152 #undef __FUNCT__
3153 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3154 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3155 {
3156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3157   PetscErrorCode ierr;
3158 
3159   PetscFunctionBegin;
3160   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3161   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164 EXTERN_C_END
3165 
3166 EXTERN_C_BEGIN
3167 #undef __FUNCT__
3168 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3169 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3170 {
3171   Mat_MPIAIJ     *b;
3172   PetscErrorCode ierr;
3173   PetscInt       i;
3174 
3175   PetscFunctionBegin;
3176   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3177   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3178   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3179   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3180 
3181   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3182   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3183   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3184   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3185   if (d_nnz) {
3186     for (i=0; i<B->rmap->n; i++) {
3187       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3188     }
3189   }
3190   if (o_nnz) {
3191     for (i=0; i<B->rmap->n; i++) {
3192       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3193     }
3194   }
3195   b = (Mat_MPIAIJ*)B->data;
3196 
3197   if (!B->preallocated) {
3198     /* Explicitly create 2 MATSEQAIJ matrices. */
3199     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3200     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3201     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3202     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3203     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3204     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3205     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3206     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3207   }
3208 
3209   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3210   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3211   B->preallocated = PETSC_TRUE;
3212   PetscFunctionReturn(0);
3213 }
3214 EXTERN_C_END
3215 
3216 #undef __FUNCT__
3217 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3218 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3219 {
3220   Mat            mat;
3221   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   *newmat       = 0;
3226   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3227   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3228   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3229   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3230   a    = (Mat_MPIAIJ*)mat->data;
3231 
3232   mat->factortype    = matin->factortype;
3233   mat->rmap->bs      = matin->rmap->bs;
3234   mat->assembled    = PETSC_TRUE;
3235   mat->insertmode   = NOT_SET_VALUES;
3236   mat->preallocated = PETSC_TRUE;
3237 
3238   a->size           = oldmat->size;
3239   a->rank           = oldmat->rank;
3240   a->donotstash     = oldmat->donotstash;
3241   a->roworiented    = oldmat->roworiented;
3242   a->rowindices     = 0;
3243   a->rowvalues      = 0;
3244   a->getrowactive   = PETSC_FALSE;
3245 
3246   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3247   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3248 
3249   if (oldmat->colmap) {
3250 #if defined (PETSC_USE_CTABLE)
3251     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3252 #else
3253     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3254     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3255     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3256 #endif
3257   } else a->colmap = 0;
3258   if (oldmat->garray) {
3259     PetscInt len;
3260     len  = oldmat->B->cmap->n;
3261     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3262     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3263     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3264   } else a->garray = 0;
3265 
3266   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3268   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3269   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3270   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3271   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3272   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3273   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3274   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3275   *newmat = mat;
3276   PetscFunctionReturn(0);
3277 }
3278 
3279 
3280 
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatLoad_MPIAIJ"
3283 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3284 {
3285   PetscScalar    *vals,*svals;
3286   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3287   PetscErrorCode ierr;
3288   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3289   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3290   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3291   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3292   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3293   int            fd;
3294 
3295   PetscFunctionBegin;
3296   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3297   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3298   if (!rank) {
3299     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3300     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3301     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3302   }
3303 
3304   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3305 
3306   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3307   M = header[1]; N = header[2];
3308   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3309   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3310   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3311 
3312   /* If global sizes are set, check if they are consistent with that given in the file */
3313   if (sizesset) {
3314     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3315   }
3316   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3317   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3318 
3319   /* determine ownership of all rows */
3320   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3321   else m = newMat->rmap->n; /* Set by user */
3322 
3323   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3324   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3325 
3326   /* First process needs enough room for process with most rows */
3327   if (!rank) {
3328     mmax       = rowners[1];
3329     for (i=2; i<size; i++) {
3330       mmax = PetscMax(mmax,rowners[i]);
3331     }
3332   } else mmax = m;
3333 
3334   rowners[0] = 0;
3335   for (i=2; i<=size; i++) {
3336     rowners[i] += rowners[i-1];
3337   }
3338   rstart = rowners[rank];
3339   rend   = rowners[rank+1];
3340 
3341   /* distribute row lengths to all processors */
3342   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3343   if (!rank) {
3344     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3345     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3346     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3347     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3348     for (j=0; j<m; j++) {
3349       procsnz[0] += ourlens[j];
3350     }
3351     for (i=1; i<size; i++) {
3352       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3353       /* calculate the number of nonzeros on each processor */
3354       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3355         procsnz[i] += rowlengths[j];
3356       }
3357       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3358     }
3359     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3360   } else {
3361     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3362   }
3363 
3364   if (!rank) {
3365     /* determine max buffer needed and allocate it */
3366     maxnz = 0;
3367     for (i=0; i<size; i++) {
3368       maxnz = PetscMax(maxnz,procsnz[i]);
3369     }
3370     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3371 
3372     /* read in my part of the matrix column indices  */
3373     nz   = procsnz[0];
3374     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3375     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3376 
3377     /* read in every one elses and ship off */
3378     for (i=1; i<size; i++) {
3379       nz     = procsnz[i];
3380       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3381       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3382     }
3383     ierr = PetscFree(cols);CHKERRQ(ierr);
3384   } else {
3385     /* determine buffer space needed for message */
3386     nz = 0;
3387     for (i=0; i<m; i++) {
3388       nz += ourlens[i];
3389     }
3390     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3391 
3392     /* receive message of column indices*/
3393     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3394   }
3395 
3396   /* determine column ownership if matrix is not square */
3397   if (N != M) {
3398     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3399     else n = newMat->cmap->n;
3400     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3401     cstart = cend - n;
3402   } else {
3403     cstart = rstart;
3404     cend   = rend;
3405     n      = cend - cstart;
3406   }
3407 
3408   /* loop over local rows, determining number of off diagonal entries */
3409   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3410   jj = 0;
3411   for (i=0; i<m; i++) {
3412     for (j=0; j<ourlens[i]; j++) {
3413       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3414       jj++;
3415     }
3416   }
3417 
3418   for (i=0; i<m; i++) {
3419     ourlens[i] -= offlens[i];
3420   }
3421   if (!sizesset) {
3422     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3423   }
3424   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3425 
3426   for (i=0; i<m; i++) {
3427     ourlens[i] += offlens[i];
3428   }
3429 
3430   if (!rank) {
3431     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3432 
3433     /* read in my part of the matrix numerical values  */
3434     nz   = procsnz[0];
3435     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3436 
3437     /* insert into matrix */
3438     jj      = rstart;
3439     smycols = mycols;
3440     svals   = vals;
3441     for (i=0; i<m; i++) {
3442       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3443       smycols += ourlens[i];
3444       svals   += ourlens[i];
3445       jj++;
3446     }
3447 
3448     /* read in other processors and ship out */
3449     for (i=1; i<size; i++) {
3450       nz     = procsnz[i];
3451       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3452       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3453     }
3454     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3455   } else {
3456     /* receive numeric values */
3457     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3458 
3459     /* receive message of values*/
3460     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3461 
3462     /* insert into matrix */
3463     jj      = rstart;
3464     smycols = mycols;
3465     svals   = vals;
3466     for (i=0; i<m; i++) {
3467       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3468       smycols += ourlens[i];
3469       svals   += ourlens[i];
3470       jj++;
3471     }
3472   }
3473   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3474   ierr = PetscFree(vals);CHKERRQ(ierr);
3475   ierr = PetscFree(mycols);CHKERRQ(ierr);
3476   ierr = PetscFree(rowners);CHKERRQ(ierr);
3477 
3478   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3479   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 #undef __FUNCT__
3484 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3485 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3486 {
3487   PetscErrorCode ierr;
3488   IS             iscol_local;
3489   PetscInt       csize;
3490 
3491   PetscFunctionBegin;
3492   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3493   if (call == MAT_REUSE_MATRIX) {
3494     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3495     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3496   } else {
3497     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3498   }
3499   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3500   if (call == MAT_INITIAL_MATRIX) {
3501     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3502     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3503   }
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 #undef __FUNCT__
3508 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3509 /*
3510     Not great since it makes two copies of the submatrix, first an SeqAIJ
3511   in local and then by concatenating the local matrices the end result.
3512   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3513 
3514   Note: This requires a sequential iscol with all indices.
3515 */
3516 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3517 {
3518   PetscErrorCode ierr;
3519   PetscMPIInt    rank,size;
3520   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3521   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3522   Mat            *local,M,Mreuse;
3523   MatScalar      *vwork,*aa;
3524   MPI_Comm       comm = ((PetscObject)mat)->comm;
3525   Mat_SeqAIJ     *aij;
3526 
3527 
3528   PetscFunctionBegin;
3529   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3531 
3532   if (call ==  MAT_REUSE_MATRIX) {
3533     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3534     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3535     local = &Mreuse;
3536     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3537   } else {
3538     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3539     Mreuse = *local;
3540     ierr   = PetscFree(local);CHKERRQ(ierr);
3541   }
3542 
3543   /*
3544       m - number of local rows
3545       n - number of columns (same on all processors)
3546       rstart - first row in new global matrix generated
3547   */
3548   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3549   if (call == MAT_INITIAL_MATRIX) {
3550     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3551     ii  = aij->i;
3552     jj  = aij->j;
3553 
3554     /*
3555         Determine the number of non-zeros in the diagonal and off-diagonal
3556         portions of the matrix in order to do correct preallocation
3557     */
3558 
3559     /* first get start and end of "diagonal" columns */
3560     if (csize == PETSC_DECIDE) {
3561       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3562       if (mglobal == n) { /* square matrix */
3563 	nlocal = m;
3564       } else {
3565         nlocal = n/size + ((n % size) > rank);
3566       }
3567     } else {
3568       nlocal = csize;
3569     }
3570     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3571     rstart = rend - nlocal;
3572     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3573 
3574     /* next, compute all the lengths */
3575     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3576     olens = dlens + m;
3577     for (i=0; i<m; i++) {
3578       jend = ii[i+1] - ii[i];
3579       olen = 0;
3580       dlen = 0;
3581       for (j=0; j<jend; j++) {
3582         if (*jj < rstart || *jj >= rend) olen++;
3583         else dlen++;
3584         jj++;
3585       }
3586       olens[i] = olen;
3587       dlens[i] = dlen;
3588     }
3589     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3590     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3591     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3592     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3593     ierr = PetscFree(dlens);CHKERRQ(ierr);
3594   } else {
3595     PetscInt ml,nl;
3596 
3597     M = *newmat;
3598     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3599     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3600     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3601     /*
3602          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3603        rather than the slower MatSetValues().
3604     */
3605     M->was_assembled = PETSC_TRUE;
3606     M->assembled     = PETSC_FALSE;
3607   }
3608   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3609   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3610   ii  = aij->i;
3611   jj  = aij->j;
3612   aa  = aij->a;
3613   for (i=0; i<m; i++) {
3614     row   = rstart + i;
3615     nz    = ii[i+1] - ii[i];
3616     cwork = jj;     jj += nz;
3617     vwork = aa;     aa += nz;
3618     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3619   }
3620 
3621   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3622   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3623   *newmat = M;
3624 
3625   /* save submatrix used in processor for next request */
3626   if (call ==  MAT_INITIAL_MATRIX) {
3627     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3628     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3629   }
3630 
3631   PetscFunctionReturn(0);
3632 }
3633 
3634 EXTERN_C_BEGIN
3635 #undef __FUNCT__
3636 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3637 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3638 {
3639   PetscInt       m,cstart, cend,j,nnz,i,d;
3640   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3641   const PetscInt *JJ;
3642   PetscScalar    *values;
3643   PetscErrorCode ierr;
3644 
3645   PetscFunctionBegin;
3646   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3647 
3648   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3649   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3650   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3651   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3652   m      = B->rmap->n;
3653   cstart = B->cmap->rstart;
3654   cend   = B->cmap->rend;
3655   rstart = B->rmap->rstart;
3656 
3657   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3658 
3659 #if defined(PETSC_USE_DEBUGGING)
3660   for (i=0; i<m; i++) {
3661     nnz     = Ii[i+1]- Ii[i];
3662     JJ      = J + Ii[i];
3663     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3664     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3665     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3666   }
3667 #endif
3668 
3669   for (i=0; i<m; i++) {
3670     nnz     = Ii[i+1]- Ii[i];
3671     JJ      = J + Ii[i];
3672     nnz_max = PetscMax(nnz_max,nnz);
3673     d       = 0;
3674     for (j=0; j<nnz; j++) {
3675       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3676     }
3677     d_nnz[i] = d;
3678     o_nnz[i] = nnz - d;
3679   }
3680   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3681   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3682 
3683   if (v) values = (PetscScalar*)v;
3684   else {
3685     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3686     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3687   }
3688 
3689   for (i=0; i<m; i++) {
3690     ii   = i + rstart;
3691     nnz  = Ii[i+1]- Ii[i];
3692     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3693   }
3694   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3695   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3696 
3697   if (!v) {
3698     ierr = PetscFree(values);CHKERRQ(ierr);
3699   }
3700   PetscFunctionReturn(0);
3701 }
3702 EXTERN_C_END
3703 
3704 #undef __FUNCT__
3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3706 /*@
3707    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3708    (the default parallel PETSc format).
3709 
3710    Collective on MPI_Comm
3711 
3712    Input Parameters:
3713 +  B - the matrix
3714 .  i - the indices into j for the start of each local row (starts with zero)
3715 .  j - the column indices for each local row (starts with zero)
3716 -  v - optional values in the matrix
3717 
3718    Level: developer
3719 
3720    Notes:
3721        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3722      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3723      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3724 
3725        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3726 
3727        The format which is used for the sparse matrix input, is equivalent to a
3728     row-major ordering.. i.e for the following matrix, the input data expected is
3729     as shown:
3730 
3731         1 0 0
3732         2 0 3     P0
3733        -------
3734         4 5 6     P1
3735 
3736      Process0 [P0]: rows_owned=[0,1]
3737         i =  {0,1,3}  [size = nrow+1  = 2+1]
3738         j =  {0,0,2}  [size = nz = 6]
3739         v =  {1,2,3}  [size = nz = 6]
3740 
3741      Process1 [P1]: rows_owned=[2]
3742         i =  {0,3}    [size = nrow+1  = 1+1]
3743         j =  {0,1,2}  [size = nz = 6]
3744         v =  {4,5,6}  [size = nz = 6]
3745 
3746 .keywords: matrix, aij, compressed row, sparse, parallel
3747 
3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3749           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3750 @*/
3751 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3752 {
3753   PetscErrorCode ierr;
3754 
3755   PetscFunctionBegin;
3756   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3762 /*@C
3763    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3764    (the default parallel PETSc format).  For good matrix assembly performance
3765    the user should preallocate the matrix storage by setting the parameters
3766    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3767    performance can be increased by more than a factor of 50.
3768 
3769    Collective on MPI_Comm
3770 
3771    Input Parameters:
3772 +  A - the matrix
3773 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3774            (same value is used for all local rows)
3775 .  d_nnz - array containing the number of nonzeros in the various rows of the
3776            DIAGONAL portion of the local submatrix (possibly different for each row)
3777            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3778            The size of this array is equal to the number of local rows, i.e 'm'.
3779            You must leave room for the diagonal entry even if it is zero.
3780 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3781            submatrix (same value is used for all local rows).
3782 -  o_nnz - array containing the number of nonzeros in the various rows of the
3783            OFF-DIAGONAL portion of the local submatrix (possibly different for
3784            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3785            structure. The size of this array is equal to the number
3786            of local rows, i.e 'm'.
3787 
3788    If the *_nnz parameter is given then the *_nz parameter is ignored
3789 
3790    The AIJ format (also called the Yale sparse matrix format or
3791    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3792    storage.  The stored row and column indices begin with zero.
3793    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3794 
3795    The parallel matrix is partitioned such that the first m0 rows belong to
3796    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3797    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3798 
3799    The DIAGONAL portion of the local submatrix of a processor can be defined
3800    as the submatrix which is obtained by extraction the part corresponding to
3801    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3802    first row that belongs to the processor, r2 is the last row belonging to
3803    the this processor, and c1-c2 is range of indices of the local part of a
3804    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3805    common case of a square matrix, the row and column ranges are the same and
3806    the DIAGONAL part is also square. The remaining portion of the local
3807    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3808 
3809    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3810 
3811    You can call MatGetInfo() to get information on how effective the preallocation was;
3812    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3813    You can also run with the option -info and look for messages with the string
3814    malloc in them to see if additional memory allocation was needed.
3815 
3816    Example usage:
3817 
3818    Consider the following 8x8 matrix with 34 non-zero values, that is
3819    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3820    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3821    as follows:
3822 
3823 .vb
3824             1  2  0  |  0  3  0  |  0  4
3825     Proc0   0  5  6  |  7  0  0  |  8  0
3826             9  0 10  | 11  0  0  | 12  0
3827     -------------------------------------
3828            13  0 14  | 15 16 17  |  0  0
3829     Proc1   0 18  0  | 19 20 21  |  0  0
3830             0  0  0  | 22 23  0  | 24  0
3831     -------------------------------------
3832     Proc2  25 26 27  |  0  0 28  | 29  0
3833            30  0  0  | 31 32 33  |  0 34
3834 .ve
3835 
3836    This can be represented as a collection of submatrices as:
3837 
3838 .vb
3839       A B C
3840       D E F
3841       G H I
3842 .ve
3843 
3844    Where the submatrices A,B,C are owned by proc0, D,E,F are
3845    owned by proc1, G,H,I are owned by proc2.
3846 
3847    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3848    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3849    The 'M','N' parameters are 8,8, and have the same values on all procs.
3850 
3851    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3852    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3853    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3854    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3855    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3856    matrix, ans [DF] as another SeqAIJ matrix.
3857 
3858    When d_nz, o_nz parameters are specified, d_nz storage elements are
3859    allocated for every row of the local diagonal submatrix, and o_nz
3860    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3861    One way to choose d_nz and o_nz is to use the max nonzerors per local
3862    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3863    In this case, the values of d_nz,o_nz are:
3864 .vb
3865      proc0 : dnz = 2, o_nz = 2
3866      proc1 : dnz = 3, o_nz = 2
3867      proc2 : dnz = 1, o_nz = 4
3868 .ve
3869    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3870    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3871    for proc3. i.e we are using 12+15+10=37 storage locations to store
3872    34 values.
3873 
3874    When d_nnz, o_nnz parameters are specified, the storage is specified
3875    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3876    In the above case the values for d_nnz,o_nnz are:
3877 .vb
3878      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3879      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3880      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3881 .ve
3882    Here the space allocated is sum of all the above values i.e 34, and
3883    hence pre-allocation is perfect.
3884 
3885    Level: intermediate
3886 
3887 .keywords: matrix, aij, compressed row, sparse, parallel
3888 
3889 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3890           MPIAIJ, MatGetInfo()
3891 @*/
3892 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3893 {
3894   PetscErrorCode ierr;
3895 
3896   PetscFunctionBegin;
3897   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 #undef __FUNCT__
3902 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3903 /*@
3904      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3905          CSR format the local rows.
3906 
3907    Collective on MPI_Comm
3908 
3909    Input Parameters:
3910 +  comm - MPI communicator
3911 .  m - number of local rows (Cannot be PETSC_DECIDE)
3912 .  n - This value should be the same as the local size used in creating the
3913        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3914        calculated if N is given) For square matrices n is almost always m.
3915 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3916 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3917 .   i - row indices
3918 .   j - column indices
3919 -   a - matrix values
3920 
3921    Output Parameter:
3922 .   mat - the matrix
3923 
3924    Level: intermediate
3925 
3926    Notes:
3927        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3928      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3929      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3930 
3931        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3932 
3933        The format which is used for the sparse matrix input, is equivalent to a
3934     row-major ordering.. i.e for the following matrix, the input data expected is
3935     as shown:
3936 
3937         1 0 0
3938         2 0 3     P0
3939        -------
3940         4 5 6     P1
3941 
3942      Process0 [P0]: rows_owned=[0,1]
3943         i =  {0,1,3}  [size = nrow+1  = 2+1]
3944         j =  {0,0,2}  [size = nz = 6]
3945         v =  {1,2,3}  [size = nz = 6]
3946 
3947      Process1 [P1]: rows_owned=[2]
3948         i =  {0,3}    [size = nrow+1  = 1+1]
3949         j =  {0,1,2}  [size = nz = 6]
3950         v =  {4,5,6}  [size = nz = 6]
3951 
3952 .keywords: matrix, aij, compressed row, sparse, parallel
3953 
3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3955           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3956 @*/
3957 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3958 {
3959   PetscErrorCode ierr;
3960 
3961  PetscFunctionBegin;
3962   if (i[0]) {
3963     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3964   }
3965   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3966   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3967   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3968   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3969   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3970   PetscFunctionReturn(0);
3971 }
3972 
3973 #undef __FUNCT__
3974 #define __FUNCT__ "MatCreateMPIAIJ"
3975 /*@C
3976    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3977    (the default parallel PETSc format).  For good matrix assembly performance
3978    the user should preallocate the matrix storage by setting the parameters
3979    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3980    performance can be increased by more than a factor of 50.
3981 
3982    Collective on MPI_Comm
3983 
3984    Input Parameters:
3985 +  comm - MPI communicator
3986 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3987            This value should be the same as the local size used in creating the
3988            y vector for the matrix-vector product y = Ax.
3989 .  n - This value should be the same as the local size used in creating the
3990        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3991        calculated if N is given) For square matrices n is almost always m.
3992 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3993 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3994 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3995            (same value is used for all local rows)
3996 .  d_nnz - array containing the number of nonzeros in the various rows of the
3997            DIAGONAL portion of the local submatrix (possibly different for each row)
3998            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3999            The size of this array is equal to the number of local rows, i.e 'm'.
4000            You must leave room for the diagonal entry even if it is zero.
4001 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4002            submatrix (same value is used for all local rows).
4003 -  o_nnz - array containing the number of nonzeros in the various rows of the
4004            OFF-DIAGONAL portion of the local submatrix (possibly different for
4005            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4006            structure. The size of this array is equal to the number
4007            of local rows, i.e 'm'.
4008 
4009    Output Parameter:
4010 .  A - the matrix
4011 
4012    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4013    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4014    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4015 
4016    Notes:
4017    If the *_nnz parameter is given then the *_nz parameter is ignored
4018 
4019    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4020    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4021    storage requirements for this matrix.
4022 
4023    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4024    processor than it must be used on all processors that share the object for
4025    that argument.
4026 
4027    The user MUST specify either the local or global matrix dimensions
4028    (possibly both).
4029 
4030    The parallel matrix is partitioned across processors such that the
4031    first m0 rows belong to process 0, the next m1 rows belong to
4032    process 1, the next m2 rows belong to process 2 etc.. where
4033    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4034    values corresponding to [m x N] submatrix.
4035 
4036    The columns are logically partitioned with the n0 columns belonging
4037    to 0th partition, the next n1 columns belonging to the next
4038    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4039 
4040    The DIAGONAL portion of the local submatrix on any given processor
4041    is the submatrix corresponding to the rows and columns m,n
4042    corresponding to the given processor. i.e diagonal matrix on
4043    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4044    etc. The remaining portion of the local submatrix [m x (N-n)]
4045    constitute the OFF-DIAGONAL portion. The example below better
4046    illustrates this concept.
4047 
4048    For a square global matrix we define each processor's diagonal portion
4049    to be its local rows and the corresponding columns (a square submatrix);
4050    each processor's off-diagonal portion encompasses the remainder of the
4051    local matrix (a rectangular submatrix).
4052 
4053    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4054 
4055    When calling this routine with a single process communicator, a matrix of
4056    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4057    type of communicator, use the construction mechanism:
4058      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4059 
4060    By default, this format uses inodes (identical nodes) when possible.
4061    We search for consecutive rows with the same nonzero structure, thereby
4062    reusing matrix information to achieve increased efficiency.
4063 
4064    Options Database Keys:
4065 +  -mat_no_inode  - Do not use inodes
4066 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4067 -  -mat_aij_oneindex - Internally use indexing starting at 1
4068         rather than 0.  Note that when calling MatSetValues(),
4069         the user still MUST index entries starting at 0!
4070 
4071 
4072    Example usage:
4073 
4074    Consider the following 8x8 matrix with 34 non-zero values, that is
4075    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4076    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4077    as follows:
4078 
4079 .vb
4080             1  2  0  |  0  3  0  |  0  4
4081     Proc0   0  5  6  |  7  0  0  |  8  0
4082             9  0 10  | 11  0  0  | 12  0
4083     -------------------------------------
4084            13  0 14  | 15 16 17  |  0  0
4085     Proc1   0 18  0  | 19 20 21  |  0  0
4086             0  0  0  | 22 23  0  | 24  0
4087     -------------------------------------
4088     Proc2  25 26 27  |  0  0 28  | 29  0
4089            30  0  0  | 31 32 33  |  0 34
4090 .ve
4091 
4092    This can be represented as a collection of submatrices as:
4093 
4094 .vb
4095       A B C
4096       D E F
4097       G H I
4098 .ve
4099 
4100    Where the submatrices A,B,C are owned by proc0, D,E,F are
4101    owned by proc1, G,H,I are owned by proc2.
4102 
4103    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4104    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4105    The 'M','N' parameters are 8,8, and have the same values on all procs.
4106 
4107    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4108    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4109    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4110    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4111    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4112    matrix, ans [DF] as another SeqAIJ matrix.
4113 
4114    When d_nz, o_nz parameters are specified, d_nz storage elements are
4115    allocated for every row of the local diagonal submatrix, and o_nz
4116    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4117    One way to choose d_nz and o_nz is to use the max nonzerors per local
4118    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4119    In this case, the values of d_nz,o_nz are:
4120 .vb
4121      proc0 : dnz = 2, o_nz = 2
4122      proc1 : dnz = 3, o_nz = 2
4123      proc2 : dnz = 1, o_nz = 4
4124 .ve
4125    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4126    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4127    for proc3. i.e we are using 12+15+10=37 storage locations to store
4128    34 values.
4129 
4130    When d_nnz, o_nnz parameters are specified, the storage is specified
4131    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4132    In the above case the values for d_nnz,o_nnz are:
4133 .vb
4134      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4135      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4136      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4137 .ve
4138    Here the space allocated is sum of all the above values i.e 34, and
4139    hence pre-allocation is perfect.
4140 
4141    Level: intermediate
4142 
4143 .keywords: matrix, aij, compressed row, sparse, parallel
4144 
4145 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4146           MPIAIJ, MatCreateMPIAIJWithArrays()
4147 @*/
4148 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4149 {
4150   PetscErrorCode ierr;
4151   PetscMPIInt    size;
4152 
4153   PetscFunctionBegin;
4154   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4155   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4156   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4157   if (size > 1) {
4158     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4159     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4160   } else {
4161     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4162     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4163   }
4164   PetscFunctionReturn(0);
4165 }
4166 
4167 #undef __FUNCT__
4168 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4169 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4170 {
4171   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4172 
4173   PetscFunctionBegin;
4174   *Ad     = a->A;
4175   *Ao     = a->B;
4176   *colmap = a->garray;
4177   PetscFunctionReturn(0);
4178 }
4179 
4180 #undef __FUNCT__
4181 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4182 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4183 {
4184   PetscErrorCode ierr;
4185   PetscInt       i;
4186   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4187 
4188   PetscFunctionBegin;
4189   if (coloring->ctype == IS_COLORING_GLOBAL) {
4190     ISColoringValue *allcolors,*colors;
4191     ISColoring      ocoloring;
4192 
4193     /* set coloring for diagonal portion */
4194     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4195 
4196     /* set coloring for off-diagonal portion */
4197     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4198     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4199     for (i=0; i<a->B->cmap->n; i++) {
4200       colors[i] = allcolors[a->garray[i]];
4201     }
4202     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4203     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4204     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4205     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4206   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4207     ISColoringValue *colors;
4208     PetscInt        *larray;
4209     ISColoring      ocoloring;
4210 
4211     /* set coloring for diagonal portion */
4212     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4213     for (i=0; i<a->A->cmap->n; i++) {
4214       larray[i] = i + A->cmap->rstart;
4215     }
4216     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4217     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4218     for (i=0; i<a->A->cmap->n; i++) {
4219       colors[i] = coloring->colors[larray[i]];
4220     }
4221     ierr = PetscFree(larray);CHKERRQ(ierr);
4222     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4223     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4224     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4225 
4226     /* set coloring for off-diagonal portion */
4227     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4228     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4229     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4230     for (i=0; i<a->B->cmap->n; i++) {
4231       colors[i] = coloring->colors[larray[i]];
4232     }
4233     ierr = PetscFree(larray);CHKERRQ(ierr);
4234     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4235     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4236     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4237   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4238 
4239   PetscFunctionReturn(0);
4240 }
4241 
4242 #if defined(PETSC_HAVE_ADIC)
4243 #undef __FUNCT__
4244 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4245 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4246 {
4247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4248   PetscErrorCode ierr;
4249 
4250   PetscFunctionBegin;
4251   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4252   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4253   PetscFunctionReturn(0);
4254 }
4255 #endif
4256 
4257 #undef __FUNCT__
4258 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4259 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4260 {
4261   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4262   PetscErrorCode ierr;
4263 
4264   PetscFunctionBegin;
4265   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4266   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4267   PetscFunctionReturn(0);
4268 }
4269 
4270 #undef __FUNCT__
4271 #define __FUNCT__ "MatMerge"
4272 /*@
4273       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4274                  matrices from each processor
4275 
4276     Collective on MPI_Comm
4277 
4278    Input Parameters:
4279 +    comm - the communicators the parallel matrix will live on
4280 .    inmat - the input sequential matrices
4281 .    n - number of local columns (or PETSC_DECIDE)
4282 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4283 
4284    Output Parameter:
4285 .    outmat - the parallel matrix generated
4286 
4287     Level: advanced
4288 
4289    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4290 
4291 @*/
4292 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4293 {
4294   PetscErrorCode ierr;
4295   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4296   PetscInt       *indx;
4297   PetscScalar    *values;
4298 
4299   PetscFunctionBegin;
4300   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4301   if (scall == MAT_INITIAL_MATRIX){
4302     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4303     if (n == PETSC_DECIDE){
4304       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4305     }
4306     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4307     rstart -= m;
4308 
4309     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4310     for (i=0;i<m;i++) {
4311       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4312       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4313       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4314     }
4315     /* This routine will ONLY return MPIAIJ type matrix */
4316     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4317     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4318     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4319     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4320     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4321 
4322   } else if (scall == MAT_REUSE_MATRIX){
4323     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4324   } else {
4325     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4326   }
4327 
4328   for (i=0;i<m;i++) {
4329     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4330     Ii    = i + rstart;
4331     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4332     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4333   }
4334   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4335   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4336   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4337 
4338   PetscFunctionReturn(0);
4339 }
4340 
4341 #undef __FUNCT__
4342 #define __FUNCT__ "MatFileSplit"
4343 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4344 {
4345   PetscErrorCode    ierr;
4346   PetscMPIInt       rank;
4347   PetscInt          m,N,i,rstart,nnz;
4348   size_t            len;
4349   const PetscInt    *indx;
4350   PetscViewer       out;
4351   char              *name;
4352   Mat               B;
4353   const PetscScalar *values;
4354 
4355   PetscFunctionBegin;
4356   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4357   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4358   /* Should this be the type of the diagonal block of A? */
4359   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4360   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4361   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4362   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4363   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4364   for (i=0;i<m;i++) {
4365     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4366     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4367     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4368   }
4369   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4370   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4371 
4372   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4373   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4374   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4375   sprintf(name,"%s.%d",outfile,rank);
4376   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4377   ierr = PetscFree(name);
4378   ierr = MatView(B,out);CHKERRQ(ierr);
4379   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4380   ierr = MatDestroy(&B);CHKERRQ(ierr);
4381   PetscFunctionReturn(0);
4382 }
4383 
4384 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4385 #undef __FUNCT__
4386 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4387 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4388 {
4389   PetscErrorCode       ierr;
4390   Mat_Merge_SeqsToMPI  *merge;
4391   PetscContainer       container;
4392 
4393   PetscFunctionBegin;
4394   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4395   if (container) {
4396     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4397     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4398     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4399     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4400     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4401     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4402     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4403     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4404     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4405     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4406     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4407     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4408     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4409     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4410     ierr = PetscFree(merge);CHKERRQ(ierr);
4411     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4412   }
4413   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4414   PetscFunctionReturn(0);
4415 }
4416 
4417 #include <../src/mat/utils/freespace.h>
4418 #include <petscbt.h>
4419 
4420 #undef __FUNCT__
4421 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4422 /*@C
4423       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4424                  matrices from each processor
4425 
4426     Collective on MPI_Comm
4427 
4428    Input Parameters:
4429 +    comm - the communicators the parallel matrix will live on
4430 .    seqmat - the input sequential matrices
4431 .    m - number of local rows (or PETSC_DECIDE)
4432 .    n - number of local columns (or PETSC_DECIDE)
4433 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4434 
4435    Output Parameter:
4436 .    mpimat - the parallel matrix generated
4437 
4438     Level: advanced
4439 
4440    Notes:
4441      The dimensions of the sequential matrix in each processor MUST be the same.
4442      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4443      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4444 @*/
4445 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4446 {
4447   PetscErrorCode       ierr;
4448   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4449   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4450   PetscMPIInt          size,rank,taga,*len_s;
4451   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4452   PetscInt             proc,m;
4453   PetscInt             **buf_ri,**buf_rj;
4454   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4455   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4456   MPI_Request          *s_waits,*r_waits;
4457   MPI_Status           *status;
4458   MatScalar            *aa=a->a;
4459   MatScalar            **abuf_r,*ba_i;
4460   Mat_Merge_SeqsToMPI  *merge;
4461   PetscContainer       container;
4462 
4463   PetscFunctionBegin;
4464   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4465 
4466   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4467   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4468 
4469   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4470   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4471 
4472   bi     = merge->bi;
4473   bj     = merge->bj;
4474   buf_ri = merge->buf_ri;
4475   buf_rj = merge->buf_rj;
4476 
4477   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4478   owners = merge->rowmap->range;
4479   len_s  = merge->len_s;
4480 
4481   /* send and recv matrix values */
4482   /*-----------------------------*/
4483   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4484   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4485 
4486   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4487   for (proc=0,k=0; proc<size; proc++){
4488     if (!len_s[proc]) continue;
4489     i = owners[proc];
4490     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4491     k++;
4492   }
4493 
4494   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4495   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4496   ierr = PetscFree(status);CHKERRQ(ierr);
4497 
4498   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4499   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4500 
4501   /* insert mat values of mpimat */
4502   /*----------------------------*/
4503   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4504   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4505 
4506   for (k=0; k<merge->nrecv; k++){
4507     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4508     nrows = *(buf_ri_k[k]);
4509     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4510     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4511   }
4512 
4513   /* set values of ba */
4514   m = merge->rowmap->n;
4515   for (i=0; i<m; i++) {
4516     arow = owners[rank] + i;
4517     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4518     bnzi = bi[i+1] - bi[i];
4519     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4520 
4521     /* add local non-zero vals of this proc's seqmat into ba */
4522     anzi = ai[arow+1] - ai[arow];
4523     aj   = a->j + ai[arow];
4524     aa   = a->a + ai[arow];
4525     nextaj = 0;
4526     for (j=0; nextaj<anzi; j++){
4527       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4528         ba_i[j] += aa[nextaj++];
4529       }
4530     }
4531 
4532     /* add received vals into ba */
4533     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4534       /* i-th row */
4535       if (i == *nextrow[k]) {
4536         anzi = *(nextai[k]+1) - *nextai[k];
4537         aj   = buf_rj[k] + *(nextai[k]);
4538         aa   = abuf_r[k] + *(nextai[k]);
4539         nextaj = 0;
4540         for (j=0; nextaj<anzi; j++){
4541           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4542             ba_i[j] += aa[nextaj++];
4543           }
4544         }
4545         nextrow[k]++; nextai[k]++;
4546       }
4547     }
4548     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4549   }
4550   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4551   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4552 
4553   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4554   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4555   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4556   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4557   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4558   PetscFunctionReturn(0);
4559 }
4560 
4561 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4562 
4563 #undef __FUNCT__
4564 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4565 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4566 {
4567   PetscErrorCode       ierr;
4568   Mat                  B_mpi;
4569   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4570   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4571   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4572   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4573   PetscInt             len,proc,*dnz,*onz;
4574   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4575   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4576   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4577   MPI_Status           *status;
4578   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4579   PetscBT              lnkbt;
4580   Mat_Merge_SeqsToMPI  *merge;
4581   PetscContainer       container;
4582 
4583   PetscFunctionBegin;
4584   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4585 
4586   /* make sure it is a PETSc comm */
4587   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4589   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4590 
4591   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4592   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4593 
4594   /* determine row ownership */
4595   /*---------------------------------------------------------*/
4596   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4597   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4598   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4599   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4600   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4601   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4602   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4603 
4604   m      = merge->rowmap->n;
4605   M      = merge->rowmap->N;
4606   owners = merge->rowmap->range;
4607 
4608   /* determine the number of messages to send, their lengths */
4609   /*---------------------------------------------------------*/
4610   len_s  = merge->len_s;
4611 
4612   len = 0;  /* length of buf_si[] */
4613   merge->nsend = 0;
4614   for (proc=0; proc<size; proc++){
4615     len_si[proc] = 0;
4616     if (proc == rank){
4617       len_s[proc] = 0;
4618     } else {
4619       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4620       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4621     }
4622     if (len_s[proc]) {
4623       merge->nsend++;
4624       nrows = 0;
4625       for (i=owners[proc]; i<owners[proc+1]; i++){
4626         if (ai[i+1] > ai[i]) nrows++;
4627       }
4628       len_si[proc] = 2*(nrows+1);
4629       len += len_si[proc];
4630     }
4631   }
4632 
4633   /* determine the number and length of messages to receive for ij-structure */
4634   /*-------------------------------------------------------------------------*/
4635   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4636   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4637 
4638   /* post the Irecv of j-structure */
4639   /*-------------------------------*/
4640   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4641   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4642 
4643   /* post the Isend of j-structure */
4644   /*--------------------------------*/
4645   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4646 
4647   for (proc=0, k=0; proc<size; proc++){
4648     if (!len_s[proc]) continue;
4649     i = owners[proc];
4650     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4651     k++;
4652   }
4653 
4654   /* receives and sends of j-structure are complete */
4655   /*------------------------------------------------*/
4656   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4657   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4658 
4659   /* send and recv i-structure */
4660   /*---------------------------*/
4661   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4662   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4663 
4664   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4665   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4666   for (proc=0,k=0; proc<size; proc++){
4667     if (!len_s[proc]) continue;
4668     /* form outgoing message for i-structure:
4669          buf_si[0]:                 nrows to be sent
4670                [1:nrows]:           row index (global)
4671                [nrows+1:2*nrows+1]: i-structure index
4672     */
4673     /*-------------------------------------------*/
4674     nrows = len_si[proc]/2 - 1;
4675     buf_si_i    = buf_si + nrows+1;
4676     buf_si[0]   = nrows;
4677     buf_si_i[0] = 0;
4678     nrows = 0;
4679     for (i=owners[proc]; i<owners[proc+1]; i++){
4680       anzi = ai[i+1] - ai[i];
4681       if (anzi) {
4682         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4683         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4684         nrows++;
4685       }
4686     }
4687     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4688     k++;
4689     buf_si += len_si[proc];
4690   }
4691 
4692   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4693   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4694 
4695   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4696   for (i=0; i<merge->nrecv; i++){
4697     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4698   }
4699 
4700   ierr = PetscFree(len_si);CHKERRQ(ierr);
4701   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4702   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4703   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4704   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4705   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4706   ierr = PetscFree(status);CHKERRQ(ierr);
4707 
4708   /* compute a local seq matrix in each processor */
4709   /*----------------------------------------------*/
4710   /* allocate bi array and free space for accumulating nonzero column info */
4711   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4712   bi[0] = 0;
4713 
4714   /* create and initialize a linked list */
4715   nlnk = N+1;
4716   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4717 
4718   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4719   len = 0;
4720   len  = ai[owners[rank+1]] - ai[owners[rank]];
4721   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4722   current_space = free_space;
4723 
4724   /* determine symbolic info for each local row */
4725   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4726 
4727   for (k=0; k<merge->nrecv; k++){
4728     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4729     nrows = *buf_ri_k[k];
4730     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4731     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4732   }
4733 
4734   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4735   len = 0;
4736   for (i=0;i<m;i++) {
4737     bnzi   = 0;
4738     /* add local non-zero cols of this proc's seqmat into lnk */
4739     arow   = owners[rank] + i;
4740     anzi   = ai[arow+1] - ai[arow];
4741     aj     = a->j + ai[arow];
4742     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4743     bnzi += nlnk;
4744     /* add received col data into lnk */
4745     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4746       if (i == *nextrow[k]) { /* i-th row */
4747         anzi = *(nextai[k]+1) - *nextai[k];
4748         aj   = buf_rj[k] + *nextai[k];
4749         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4750         bnzi += nlnk;
4751         nextrow[k]++; nextai[k]++;
4752       }
4753     }
4754     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4755 
4756     /* if free space is not available, make more free space */
4757     if (current_space->local_remaining<bnzi) {
4758       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4759       nspacedouble++;
4760     }
4761     /* copy data into free space, then initialize lnk */
4762     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4763     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4764 
4765     current_space->array           += bnzi;
4766     current_space->local_used      += bnzi;
4767     current_space->local_remaining -= bnzi;
4768 
4769     bi[i+1] = bi[i] + bnzi;
4770   }
4771 
4772   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4773 
4774   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4775   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4776   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4777 
4778   /* create symbolic parallel matrix B_mpi */
4779   /*---------------------------------------*/
4780   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4781   if (n==PETSC_DECIDE) {
4782     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4783   } else {
4784     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4785   }
4786   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4787   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4788   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4789 
4790   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4791   B_mpi->assembled     = PETSC_FALSE;
4792   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4793   merge->bi            = bi;
4794   merge->bj            = bj;
4795   merge->buf_ri        = buf_ri;
4796   merge->buf_rj        = buf_rj;
4797   merge->coi           = PETSC_NULL;
4798   merge->coj           = PETSC_NULL;
4799   merge->owners_co     = PETSC_NULL;
4800 
4801   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4802 
4803   /* attach the supporting struct to B_mpi for reuse */
4804   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4805   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4806   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4807   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4808   *mpimat = B_mpi;
4809 
4810   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4811   PetscFunctionReturn(0);
4812 }
4813 
4814 #undef __FUNCT__
4815 #define __FUNCT__ "MatMerge_SeqsToMPI"
4816 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4817 {
4818   PetscErrorCode   ierr;
4819 
4820   PetscFunctionBegin;
4821   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4822   if (scall == MAT_INITIAL_MATRIX){
4823     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4824   }
4825   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4826   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4827   PetscFunctionReturn(0);
4828 }
4829 
4830 #undef __FUNCT__
4831 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4832 /*@
4833      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4834           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4835           with MatGetSize()
4836 
4837     Not Collective
4838 
4839    Input Parameters:
4840 +    A - the matrix
4841 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4842 
4843    Output Parameter:
4844 .    A_loc - the local sequential matrix generated
4845 
4846     Level: developer
4847 
4848 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4849 
4850 @*/
4851 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4852 {
4853   PetscErrorCode  ierr;
4854   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4855   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4856   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4857   MatScalar       *aa=a->a,*ba=b->a,*cam;
4858   PetscScalar     *ca;
4859   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4860   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4861   PetscBool       match;
4862 
4863   PetscFunctionBegin;
4864   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4865   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4866   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4867   if (scall == MAT_INITIAL_MATRIX){
4868     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4869     ci[0] = 0;
4870     for (i=0; i<am; i++){
4871       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4872     }
4873     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4874     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4875     k = 0;
4876     for (i=0; i<am; i++) {
4877       ncols_o = bi[i+1] - bi[i];
4878       ncols_d = ai[i+1] - ai[i];
4879       /* off-diagonal portion of A */
4880       for (jo=0; jo<ncols_o; jo++) {
4881         col = cmap[*bj];
4882         if (col >= cstart) break;
4883         cj[k]   = col; bj++;
4884         ca[k++] = *ba++;
4885       }
4886       /* diagonal portion of A */
4887       for (j=0; j<ncols_d; j++) {
4888         cj[k]   = cstart + *aj++;
4889         ca[k++] = *aa++;
4890       }
4891       /* off-diagonal portion of A */
4892       for (j=jo; j<ncols_o; j++) {
4893         cj[k]   = cmap[*bj++];
4894         ca[k++] = *ba++;
4895       }
4896     }
4897     /* put together the new matrix */
4898     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4899     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4900     /* Since these are PETSc arrays, change flags to free them as necessary. */
4901     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4902     mat->free_a  = PETSC_TRUE;
4903     mat->free_ij = PETSC_TRUE;
4904     mat->nonew   = 0;
4905   } else if (scall == MAT_REUSE_MATRIX){
4906     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4907     ci = mat->i; cj = mat->j; cam = mat->a;
4908     for (i=0; i<am; i++) {
4909       /* off-diagonal portion of A */
4910       ncols_o = bi[i+1] - bi[i];
4911       for (jo=0; jo<ncols_o; jo++) {
4912         col = cmap[*bj];
4913         if (col >= cstart) break;
4914         *cam++ = *ba++; bj++;
4915       }
4916       /* diagonal portion of A */
4917       ncols_d = ai[i+1] - ai[i];
4918       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4919       /* off-diagonal portion of A */
4920       for (j=jo; j<ncols_o; j++) {
4921         *cam++ = *ba++; bj++;
4922       }
4923     }
4924   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4925   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4926   PetscFunctionReturn(0);
4927 }
4928 
4929 #undef __FUNCT__
4930 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4931 /*@C
4932      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4933 
4934     Not Collective
4935 
4936    Input Parameters:
4937 +    A - the matrix
4938 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4939 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4940 
4941    Output Parameter:
4942 .    A_loc - the local sequential matrix generated
4943 
4944     Level: developer
4945 
4946 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4947 
4948 @*/
4949 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4950 {
4951   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4952   PetscErrorCode    ierr;
4953   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4954   IS                isrowa,iscola;
4955   Mat               *aloc;
4956   PetscBool       match;
4957 
4958   PetscFunctionBegin;
4959   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4960   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4961   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4962   if (!row){
4963     start = A->rmap->rstart; end = A->rmap->rend;
4964     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4965   } else {
4966     isrowa = *row;
4967   }
4968   if (!col){
4969     start = A->cmap->rstart;
4970     cmap  = a->garray;
4971     nzA   = a->A->cmap->n;
4972     nzB   = a->B->cmap->n;
4973     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4974     ncols = 0;
4975     for (i=0; i<nzB; i++) {
4976       if (cmap[i] < start) idx[ncols++] = cmap[i];
4977       else break;
4978     }
4979     imark = i;
4980     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4981     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4982     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4983   } else {
4984     iscola = *col;
4985   }
4986   if (scall != MAT_INITIAL_MATRIX){
4987     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4988     aloc[0] = *A_loc;
4989   }
4990   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4991   *A_loc = aloc[0];
4992   ierr = PetscFree(aloc);CHKERRQ(ierr);
4993   if (!row){
4994     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4995   }
4996   if (!col){
4997     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4998   }
4999   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5000   PetscFunctionReturn(0);
5001 }
5002 
5003 #undef __FUNCT__
5004 #define __FUNCT__ "MatGetBrowsOfAcols"
5005 /*@C
5006     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5007 
5008     Collective on Mat
5009 
5010    Input Parameters:
5011 +    A,B - the matrices in mpiaij format
5012 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5013 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5014 
5015    Output Parameter:
5016 +    rowb, colb - index sets of rows and columns of B to extract
5017 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
5018 -    B_seq - the sequential matrix generated
5019 
5020     Level: developer
5021 
5022 @*/
5023 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
5024 {
5025   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5026   PetscErrorCode    ierr;
5027   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5028   IS                isrowb,iscolb;
5029   Mat               *bseq;
5030 
5031   PetscFunctionBegin;
5032   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5033     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5034   }
5035   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5036 
5037   if (scall == MAT_INITIAL_MATRIX){
5038     start = A->cmap->rstart;
5039     cmap  = a->garray;
5040     nzA   = a->A->cmap->n;
5041     nzB   = a->B->cmap->n;
5042     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5043     ncols = 0;
5044     for (i=0; i<nzB; i++) {  /* row < local row index */
5045       if (cmap[i] < start) idx[ncols++] = cmap[i];
5046       else break;
5047     }
5048     imark = i;
5049     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5050     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5051     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5052     *brstart = imark;
5053     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5054   } else {
5055     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5056     isrowb = *rowb; iscolb = *colb;
5057     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5058     bseq[0] = *B_seq;
5059   }
5060   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5061   *B_seq = bseq[0];
5062   ierr = PetscFree(bseq);CHKERRQ(ierr);
5063   if (!rowb){
5064     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5065   } else {
5066     *rowb = isrowb;
5067   }
5068   if (!colb){
5069     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5070   } else {
5071     *colb = iscolb;
5072   }
5073   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5074   PetscFunctionReturn(0);
5075 }
5076 
5077 #undef __FUNCT__
5078 #define __FUNCT__ "MatGetBrowsOfAoCols"
5079 /*@C
5080     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5081     of the OFF-DIAGONAL portion of local A
5082 
5083     Collective on Mat
5084 
5085    Input Parameters:
5086 +    A,B - the matrices in mpiaij format
5087 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5088 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5089 .    startsj_r - similar to startsj for receives
5090 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5091 
5092    Output Parameter:
5093 +    B_oth - the sequential matrix generated
5094 
5095     Level: developer
5096 
5097 @*/
5098 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5099 {
5100   VecScatter_MPI_General *gen_to,*gen_from;
5101   PetscErrorCode         ierr;
5102   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5103   Mat_SeqAIJ             *b_oth;
5104   VecScatter             ctx=a->Mvctx;
5105   MPI_Comm               comm=((PetscObject)ctx)->comm;
5106   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5107   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5108   PetscScalar            *rvalues,*svalues;
5109   MatScalar              *b_otha,*bufa,*bufA;
5110   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5111   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5112   MPI_Status             *sstatus,rstatus;
5113   PetscMPIInt            jj;
5114   PetscInt               *cols,sbs,rbs;
5115   PetscScalar            *vals;
5116 
5117   PetscFunctionBegin;
5118   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5119     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5120   }
5121   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5122   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5123 
5124   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5125   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5126   rvalues  = gen_from->values; /* holds the length of receiving row */
5127   svalues  = gen_to->values;   /* holds the length of sending row */
5128   nrecvs   = gen_from->n;
5129   nsends   = gen_to->n;
5130 
5131   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5132   srow     = gen_to->indices;   /* local row index to be sent */
5133   sstarts  = gen_to->starts;
5134   sprocs   = gen_to->procs;
5135   sstatus  = gen_to->sstatus;
5136   sbs      = gen_to->bs;
5137   rstarts  = gen_from->starts;
5138   rprocs   = gen_from->procs;
5139   rbs      = gen_from->bs;
5140 
5141   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5142   if (scall == MAT_INITIAL_MATRIX){
5143     /* i-array */
5144     /*---------*/
5145     /*  post receives */
5146     for (i=0; i<nrecvs; i++){
5147       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5148       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5149       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5150     }
5151 
5152     /* pack the outgoing message */
5153     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5154     sstartsj[0] = 0;  rstartsj[0] = 0;
5155     len = 0; /* total length of j or a array to be sent */
5156     k = 0;
5157     for (i=0; i<nsends; i++){
5158       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5159       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5160       for (j=0; j<nrows; j++) {
5161         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5162         for (l=0; l<sbs; l++){
5163           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5164           rowlen[j*sbs+l] = ncols;
5165           len += ncols;
5166           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5167         }
5168         k++;
5169       }
5170       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5171       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5172     }
5173     /* recvs and sends of i-array are completed */
5174     i = nrecvs;
5175     while (i--) {
5176       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5177     }
5178     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5179 
5180     /* allocate buffers for sending j and a arrays */
5181     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5182     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5183 
5184     /* create i-array of B_oth */
5185     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5186     b_othi[0] = 0;
5187     len = 0; /* total length of j or a array to be received */
5188     k = 0;
5189     for (i=0; i<nrecvs; i++){
5190       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5191       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5192       for (j=0; j<nrows; j++) {
5193         b_othi[k+1] = b_othi[k] + rowlen[j];
5194         len += rowlen[j]; k++;
5195       }
5196       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5197     }
5198 
5199     /* allocate space for j and a arrrays of B_oth */
5200     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5201     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5202 
5203     /* j-array */
5204     /*---------*/
5205     /*  post receives of j-array */
5206     for (i=0; i<nrecvs; i++){
5207       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5208       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5209     }
5210 
5211     /* pack the outgoing message j-array */
5212     k = 0;
5213     for (i=0; i<nsends; i++){
5214       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5215       bufJ = bufj+sstartsj[i];
5216       for (j=0; j<nrows; j++) {
5217         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5218         for (ll=0; ll<sbs; ll++){
5219           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5220           for (l=0; l<ncols; l++){
5221             *bufJ++ = cols[l];
5222           }
5223           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5224         }
5225       }
5226       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5227     }
5228 
5229     /* recvs and sends of j-array are completed */
5230     i = nrecvs;
5231     while (i--) {
5232       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5233     }
5234     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5235   } else if (scall == MAT_REUSE_MATRIX){
5236     sstartsj = *startsj;
5237     rstartsj = *startsj_r;
5238     bufa     = *bufa_ptr;
5239     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5240     b_otha   = b_oth->a;
5241   } else {
5242     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5243   }
5244 
5245   /* a-array */
5246   /*---------*/
5247   /*  post receives of a-array */
5248   for (i=0; i<nrecvs; i++){
5249     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5250     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5251   }
5252 
5253   /* pack the outgoing message a-array */
5254   k = 0;
5255   for (i=0; i<nsends; i++){
5256     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5257     bufA = bufa+sstartsj[i];
5258     for (j=0; j<nrows; j++) {
5259       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5260       for (ll=0; ll<sbs; ll++){
5261         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5262         for (l=0; l<ncols; l++){
5263           *bufA++ = vals[l];
5264         }
5265         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5266       }
5267     }
5268     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5269   }
5270   /* recvs and sends of a-array are completed */
5271   i = nrecvs;
5272   while (i--) {
5273     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5274   }
5275   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5276   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5277 
5278   if (scall == MAT_INITIAL_MATRIX){
5279     /* put together the new matrix */
5280     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5281 
5282     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5283     /* Since these are PETSc arrays, change flags to free them as necessary. */
5284     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5285     b_oth->free_a  = PETSC_TRUE;
5286     b_oth->free_ij = PETSC_TRUE;
5287     b_oth->nonew   = 0;
5288 
5289     ierr = PetscFree(bufj);CHKERRQ(ierr);
5290     if (!startsj || !bufa_ptr){
5291       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5292       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5293     } else {
5294       *startsj   = sstartsj;
5295       *startsj_r = rstartsj;
5296       *bufa_ptr  = bufa;
5297     }
5298   }
5299   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5300   PetscFunctionReturn(0);
5301 }
5302 
5303 #undef __FUNCT__
5304 #define __FUNCT__ "MatGetCommunicationStructs"
5305 /*@C
5306   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5307 
5308   Not Collective
5309 
5310   Input Parameters:
5311 . A - The matrix in mpiaij format
5312 
5313   Output Parameter:
5314 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5315 . colmap - A map from global column index to local index into lvec
5316 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5317 
5318   Level: developer
5319 
5320 @*/
5321 #if defined (PETSC_USE_CTABLE)
5322 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5323 #else
5324 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5325 #endif
5326 {
5327   Mat_MPIAIJ *a;
5328 
5329   PetscFunctionBegin;
5330   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5331   PetscValidPointer(lvec, 2);
5332   PetscValidPointer(colmap, 3);
5333   PetscValidPointer(multScatter, 4);
5334   a = (Mat_MPIAIJ *) A->data;
5335   if (lvec) *lvec = a->lvec;
5336   if (colmap) *colmap = a->colmap;
5337   if (multScatter) *multScatter = a->Mvctx;
5338   PetscFunctionReturn(0);
5339 }
5340 
5341 EXTERN_C_BEGIN
5342 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5343 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5344 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5345 EXTERN_C_END
5346 
5347 #undef __FUNCT__
5348 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5349 /*
5350     Computes (B'*A')' since computing B*A directly is untenable
5351 
5352                n                       p                          p
5353         (              )       (              )         (                  )
5354       m (      A       )  *  n (       B      )   =   m (         C        )
5355         (              )       (              )         (                  )
5356 
5357 */
5358 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5359 {
5360   PetscErrorCode     ierr;
5361   Mat                At,Bt,Ct;
5362 
5363   PetscFunctionBegin;
5364   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5365   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5366   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5367   ierr = MatDestroy(&At);CHKERRQ(ierr);
5368   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5369   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5370   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5371   PetscFunctionReturn(0);
5372 }
5373 
5374 #undef __FUNCT__
5375 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5376 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5377 {
5378   PetscErrorCode ierr;
5379   PetscInt       m=A->rmap->n,n=B->cmap->n;
5380   Mat            Cmat;
5381 
5382   PetscFunctionBegin;
5383   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5384   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5385   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5386   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5387   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5388   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5389   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5390   *C   = Cmat;
5391   PetscFunctionReturn(0);
5392 }
5393 
5394 /* ----------------------------------------------------------------*/
5395 #undef __FUNCT__
5396 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5397 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5398 {
5399   PetscErrorCode ierr;
5400 
5401   PetscFunctionBegin;
5402   if (scall == MAT_INITIAL_MATRIX){
5403     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5404   }
5405   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5406   PetscFunctionReturn(0);
5407 }
5408 
5409 EXTERN_C_BEGIN
5410 #if defined(PETSC_HAVE_MUMPS)
5411 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5412 #endif
5413 #if defined(PETSC_HAVE_PASTIX)
5414 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5415 #endif
5416 #if defined(PETSC_HAVE_SUPERLU_DIST)
5417 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5418 #endif
5419 #if defined(PETSC_HAVE_SPOOLES)
5420 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5421 #endif
5422 EXTERN_C_END
5423 
5424 /*MC
5425    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5426 
5427    Options Database Keys:
5428 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5429 
5430   Level: beginner
5431 
5432 .seealso: MatCreateMPIAIJ()
5433 M*/
5434 
5435 EXTERN_C_BEGIN
5436 #undef __FUNCT__
5437 #define __FUNCT__ "MatCreate_MPIAIJ"
5438 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5439 {
5440   Mat_MPIAIJ     *b;
5441   PetscErrorCode ierr;
5442   PetscMPIInt    size;
5443 
5444   PetscFunctionBegin;
5445   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5446 
5447   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5448   B->data         = (void*)b;
5449   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5450   B->rmap->bs     = 1;
5451   B->assembled    = PETSC_FALSE;
5452 
5453   B->insertmode   = NOT_SET_VALUES;
5454   b->size         = size;
5455   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5456 
5457   /* build cache for off array entries formed */
5458   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5459   b->donotstash  = PETSC_FALSE;
5460   b->colmap      = 0;
5461   b->garray      = 0;
5462   b->roworiented = PETSC_TRUE;
5463 
5464   /* stuff used for matrix vector multiply */
5465   b->lvec      = PETSC_NULL;
5466   b->Mvctx     = PETSC_NULL;
5467 
5468   /* stuff for MatGetRow() */
5469   b->rowindices   = 0;
5470   b->rowvalues    = 0;
5471   b->getrowactive = PETSC_FALSE;
5472 
5473 #if defined(PETSC_HAVE_SPOOLES)
5474   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5475                                      "MatGetFactor_mpiaij_spooles",
5476                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5477 #endif
5478 #if defined(PETSC_HAVE_MUMPS)
5479   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5480                                      "MatGetFactor_aij_mumps",
5481                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5482 #endif
5483 #if defined(PETSC_HAVE_PASTIX)
5484   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5485 					   "MatGetFactor_mpiaij_pastix",
5486 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5487 #endif
5488 #if defined(PETSC_HAVE_SUPERLU_DIST)
5489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5490                                      "MatGetFactor_mpiaij_superlu_dist",
5491                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5492 #endif
5493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5494                                      "MatStoreValues_MPIAIJ",
5495                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5497                                      "MatRetrieveValues_MPIAIJ",
5498                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5500 				     "MatGetDiagonalBlock_MPIAIJ",
5501                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5503 				     "MatIsTranspose_MPIAIJ",
5504 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5506 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5507 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5509 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5510 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5512 				     "MatDiagonalScaleLocal_MPIAIJ",
5513 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5515                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5516                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5518                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5519                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5521                                      "MatConvert_MPIAIJ_MPISBAIJ",
5522                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5523   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5524                                      "MatMatMult_MPIDense_MPIAIJ",
5525                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5527                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5528                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5529   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5530                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5531                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5533   PetscFunctionReturn(0);
5534 }
5535 EXTERN_C_END
5536 
5537 #undef __FUNCT__
5538 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5539 /*@
5540      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5541          and "off-diagonal" part of the matrix in CSR format.
5542 
5543    Collective on MPI_Comm
5544 
5545    Input Parameters:
5546 +  comm - MPI communicator
5547 .  m - number of local rows (Cannot be PETSC_DECIDE)
5548 .  n - This value should be the same as the local size used in creating the
5549        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5550        calculated if N is given) For square matrices n is almost always m.
5551 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5552 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5553 .   i - row indices for "diagonal" portion of matrix
5554 .   j - column indices
5555 .   a - matrix values
5556 .   oi - row indices for "off-diagonal" portion of matrix
5557 .   oj - column indices
5558 -   oa - matrix values
5559 
5560    Output Parameter:
5561 .   mat - the matrix
5562 
5563    Level: advanced
5564 
5565    Notes:
5566        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5567        must free the arrays once the matrix has been destroyed and not before.
5568 
5569        The i and j indices are 0 based
5570 
5571        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5572 
5573        This sets local rows and cannot be used to set off-processor values.
5574 
5575        You cannot later use MatSetValues() to change values in this matrix.
5576 
5577 .keywords: matrix, aij, compressed row, sparse, parallel
5578 
5579 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5580           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5581 @*/
5582 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5583 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5584 {
5585   PetscErrorCode ierr;
5586   Mat_MPIAIJ     *maij;
5587 
5588  PetscFunctionBegin;
5589   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5590   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5591   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5592   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5593   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5594   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5595   maij = (Mat_MPIAIJ*) (*mat)->data;
5596   maij->donotstash     = PETSC_TRUE;
5597   (*mat)->preallocated = PETSC_TRUE;
5598 
5599   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5600   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5601   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5602   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5603 
5604   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5605   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5606 
5607   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5608   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5609   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5610   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5611 
5612   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5613   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5614   PetscFunctionReturn(0);
5615 }
5616 
5617 /*
5618     Special version for direct calls from Fortran
5619 */
5620 #include <private/fortranimpl.h>
5621 
5622 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5623 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5624 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5625 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5626 #endif
5627 
5628 /* Change these macros so can be used in void function */
5629 #undef CHKERRQ
5630 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5631 #undef SETERRQ2
5632 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5633 #undef SETERRQ
5634 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5635 
5636 EXTERN_C_BEGIN
5637 #undef __FUNCT__
5638 #define __FUNCT__ "matsetvaluesmpiaij_"
5639 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5640 {
5641   Mat             mat = *mmat;
5642   PetscInt        m = *mm, n = *mn;
5643   InsertMode      addv = *maddv;
5644   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5645   PetscScalar     value;
5646   PetscErrorCode  ierr;
5647 
5648   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5649   if (mat->insertmode == NOT_SET_VALUES) {
5650     mat->insertmode = addv;
5651   }
5652 #if defined(PETSC_USE_DEBUG)
5653   else if (mat->insertmode != addv) {
5654     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5655   }
5656 #endif
5657   {
5658   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5659   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5660   PetscBool       roworiented = aij->roworiented;
5661 
5662   /* Some Variables required in the macro */
5663   Mat             A = aij->A;
5664   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5665   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5666   MatScalar       *aa = a->a;
5667   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5668   Mat             B = aij->B;
5669   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5670   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5671   MatScalar       *ba = b->a;
5672 
5673   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5674   PetscInt        nonew = a->nonew;
5675   MatScalar       *ap1,*ap2;
5676 
5677   PetscFunctionBegin;
5678   for (i=0; i<m; i++) {
5679     if (im[i] < 0) continue;
5680 #if defined(PETSC_USE_DEBUG)
5681     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5682 #endif
5683     if (im[i] >= rstart && im[i] < rend) {
5684       row      = im[i] - rstart;
5685       lastcol1 = -1;
5686       rp1      = aj + ai[row];
5687       ap1      = aa + ai[row];
5688       rmax1    = aimax[row];
5689       nrow1    = ailen[row];
5690       low1     = 0;
5691       high1    = nrow1;
5692       lastcol2 = -1;
5693       rp2      = bj + bi[row];
5694       ap2      = ba + bi[row];
5695       rmax2    = bimax[row];
5696       nrow2    = bilen[row];
5697       low2     = 0;
5698       high2    = nrow2;
5699 
5700       for (j=0; j<n; j++) {
5701         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5702         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5703         if (in[j] >= cstart && in[j] < cend){
5704           col = in[j] - cstart;
5705           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5706         } else if (in[j] < 0) continue;
5707 #if defined(PETSC_USE_DEBUG)
5708         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5709 #endif
5710         else {
5711           if (mat->was_assembled) {
5712             if (!aij->colmap) {
5713               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5714             }
5715 #if defined (PETSC_USE_CTABLE)
5716             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5717 	    col--;
5718 #else
5719             col = aij->colmap[in[j]] - 1;
5720 #endif
5721             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5722               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5723               col =  in[j];
5724               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5725               B = aij->B;
5726               b = (Mat_SeqAIJ*)B->data;
5727               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5728               rp2      = bj + bi[row];
5729               ap2      = ba + bi[row];
5730               rmax2    = bimax[row];
5731               nrow2    = bilen[row];
5732               low2     = 0;
5733               high2    = nrow2;
5734               bm       = aij->B->rmap->n;
5735               ba = b->a;
5736             }
5737           } else col = in[j];
5738           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5739         }
5740       }
5741     } else {
5742       if (!aij->donotstash) {
5743         if (roworiented) {
5744           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5745         } else {
5746           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5747         }
5748       }
5749     }
5750   }}
5751   PetscFunctionReturnVoid();
5752 }
5753 EXTERN_C_END
5754 
5755