xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b17ce1af89b448f824bb09659a9de6779534cd8e)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   if (x) {
1004     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008   }
1009   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1010 
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<slen; i++) {
1014     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1015   }
1016 
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use){
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++){
1023       n   = ii[i+1] - ii[i];
1024       aj  = aij->j + ii[i];
1025       aa  = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa        = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n   = ii[i+1] - ii[i];
1041       aj  = aij->j + ii[i];
1042       aa  = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa    = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* wait on sends */
1062   if (nsends) {
1063     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1064     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1065     ierr = PetscFree(send_status);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1068   ierr = PetscFree(svalues);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMult_MPIAIJ"
1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       nt;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1083   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscBool      merged;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1128   /* do nondiagonal part */
1129   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1130   if (!merged) {
1131     /* send it on its way */
1132     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133     /* do local part */
1134     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1135     /* receive remote parts: note this assumes the values are not actually */
1136     /* added in yy until the next line, */
1137     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   } else {
1139     /* do local part */
1140     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141     /* send it on its way */
1142     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143     /* values actually were received in the Begin() but we need to call this nop */
1144     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 EXTERN_C_BEGIN
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1152 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1153 {
1154   MPI_Comm       comm;
1155   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157   IS             Me,Notme;
1158   PetscErrorCode ierr;
1159   PetscInt       M,N,first,last,*notme,i;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163 
1164   /* Easy test: symmetric diagonal block */
1165   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1167   if (!*f) PetscFunctionReturn(0);
1168   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170   if (size == 1) PetscFunctionReturn(0);
1171 
1172   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1175   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1176   for (i=0; i<first; i++) notme[i] = i;
1177   for (i=last; i<M; i++) notme[i-last+first] = i;
1178   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1179   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1180   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1181   Aoff = Aoffs[0];
1182   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1183   Boff = Boffs[0];
1184   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1185   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1187   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1189   ierr = PetscFree(notme);CHKERRQ(ierr);
1190    PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220 {
1221   PetscErrorCode ierr;
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIAIJ"
1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatDestroy_MPIAIJ"
1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247 {
1248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252 #if defined(PETSC_USE_LOG)
1253   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254 #endif
1255   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1256   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1257   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1259 #if defined (PETSC_USE_CTABLE)
1260   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1261 #else
1262   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1266   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1267   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1268   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1269   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286 {
1287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1289   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1292   int               fd;
1293   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1294   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295   PetscScalar       *column_values;
1296   PetscInt          message_count,flowcontrolcount;
1297 
1298   PetscFunctionBegin;
1299   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1300   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1301   nz   = A->nz + B->nz;
1302   if (!rank) {
1303     header[0] = MAT_FILE_CLASSID;
1304     header[1] = mat->rmap->N;
1305     header[2] = mat->cmap->N;
1306     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1307     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1309     /* get largest number of rows any processor has */
1310     rlen = mat->rmap->n;
1311     range = mat->rmap->range;
1312     for (i=1; i<size; i++) {
1313       rlen = PetscMax(rlen,range[i+1] - range[i]);
1314     }
1315   } else {
1316     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1317     rlen = mat->rmap->n;
1318   }
1319 
1320   /* load up the local row counts */
1321   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1322   for (i=0; i<mat->rmap->n; i++) {
1323     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324   }
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1339     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* )th processor needs space a largest processor needs */
1346   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( (col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_indices[cnt++] = A->j[k] + cstart;
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_indices[cnt++] = garray[B->j[j]];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column indices to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1383 
1384   /* load up the local column values */
1385   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1386   cnt  = 0;
1387   for (i=0; i<mat->rmap->n; i++) {
1388     for (j=B->i[i]; j<B->i[i+1]; j++) {
1389       if ( garray[B->j[j]] > cstart) break;
1390       column_values[cnt++] = B->a[j];
1391     }
1392     for (k=A->i[i]; k<A->i[i+1]; k++) {
1393       column_values[cnt++] = A->a[k];
1394     }
1395     for (; j<B->i[i+1]; j++) {
1396       column_values[cnt++] = B->a[j];
1397     }
1398   }
1399   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1400 
1401   /* store the column values to the file */
1402    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1403   if (!rank) {
1404     MPI_Status status;
1405     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     for (i=1; i<size; i++) {
1407        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1408       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1409       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     }
1413     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1414   } else {
1415     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1416     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1419   }
1420   ierr = PetscFree(column_values);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427 {
1428   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1429   PetscErrorCode    ierr;
1430   PetscMPIInt       rank = aij->rank,size = aij->size;
1431   PetscBool         isdraw,iascii,isbinary;
1432   PetscViewer       sviewer;
1433   PetscViewerFormat format;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   if (iascii) {
1440     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1441     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442       MatInfo    info;
1443       PetscBool  inodes;
1444 
1445       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1446       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1449       if (!inodes) {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       }
1456       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1458       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1460       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1463       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466       PetscInt   inodecount,inodelimit,*inodes;
1467       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1468       if (inodes) {
1469         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1472       }
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475       PetscFunctionReturn(0);
1476     }
1477   } else if (isbinary) {
1478     if (size == 1) {
1479       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1480       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1481     } else {
1482       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1483     }
1484     PetscFunctionReturn(0);
1485   } else if (isdraw) {
1486     PetscDraw  draw;
1487     PetscBool  isnull;
1488     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1489     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1490   }
1491 
1492   if (size == 1) {
1493     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1495   } else {
1496     /* assemble the entire matrix onto first processor. */
1497     Mat         A;
1498     Mat_SeqAIJ  *Aloc;
1499     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500     MatScalar   *a;
1501 
1502     if (mat->rmap->N > 1024) {
1503       PetscBool  flg = PETSC_FALSE;
1504 
1505       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1506       if (!flg) {
1507         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508       }
1509     }
1510 
1511     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1512     if (!rank) {
1513       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1514     } else {
1515       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1516     }
1517     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1519     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1521     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1522 
1523     /* copy over the A part */
1524     Aloc = (Mat_SeqAIJ*)aij->A->data;
1525     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1526     row = mat->rmap->rstart;
1527     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1528     for (i=0; i<m; i++) {
1529       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1530       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1531     }
1532     aj = Aloc->j;
1533     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1534 
1535     /* copy over the B part */
1536     Aloc = (Mat_SeqAIJ*)aij->B->data;
1537     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row  = mat->rmap->rstart;
1539     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1540     ct   = cols;
1541     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1542     for (i=0; i<m; i++) {
1543       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1544       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1545     }
1546     ierr = PetscFree(ct);CHKERRQ(ierr);
1547     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1549     /*
1550        Everyone has to call to draw the matrix since the graphics waits are
1551        synchronized across all processors that share the PetscDraw object
1552     */
1553     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1554     if (!rank) {
1555       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1556       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1557       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1558       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1559     }
1560     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1561     ierr = MatDestroy(&A);CHKERRQ(ierr);
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatView_MPIAIJ"
1568 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1569 {
1570   PetscErrorCode ierr;
1571   PetscBool      iascii,isdraw,issocket,isbinary;
1572 
1573   PetscFunctionBegin;
1574   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1575   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1576   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1577   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1578   if (iascii || isdraw || isbinary || issocket) {
1579     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1580   } else {
1581     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatSOR_MPIAIJ"
1588 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1589 {
1590   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1591   PetscErrorCode ierr;
1592   Vec            bb1 = 0;
1593   PetscBool      hasop;
1594 
1595   PetscFunctionBegin;
1596   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1597     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1598   }
1599 
1600   if (flag == SOR_APPLY_UPPER) {
1601     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1602     PetscFunctionReturn(0);
1603   }
1604 
1605   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1606     if (flag & SOR_ZERO_INITIAL_GUESS) {
1607       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1608       its--;
1609     }
1610 
1611     while (its--) {
1612       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614 
1615       /* update rhs: bb1 = bb - B*x */
1616       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1617       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1618 
1619       /* local sweep */
1620       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1621     }
1622   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1623     if (flag & SOR_ZERO_INITIAL_GUESS) {
1624       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1625       its--;
1626     }
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   }  else if (flag & SOR_EISENSTAT) {
1655     Vec         xx1;
1656 
1657     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1658     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1659 
1660     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662     if (!mat->diag) {
1663       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1664       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1665     }
1666     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1667     if (hasop) {
1668       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1669     } else {
1670       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1671     }
1672     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1673 
1674     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1675 
1676     /* local sweep */
1677     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1678     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1679     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1680   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1681 
1682   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatPermute_MPIAIJ"
1688 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1689 {
1690   MPI_Comm       comm;
1691   PetscInt       first,local_rowsize,local_colsize;
1692   const PetscInt *rows;
1693   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1698   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1699   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1700   /* collect the global row permutation and invert it */
1701   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1702   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1703   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1704   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1705   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1706   /* get the local target indices */
1707   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1708   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1709   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1710   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1711   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1712   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1713   /* the column permutation is so much easier;
1714      make a local version of 'colp' and invert it */
1715   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1716   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1717   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1718   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1719   /* now we just get the submatrix */
1720   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1721   /* clean up */
1722   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1723   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1729 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1730 {
1731   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1732   Mat            A = mat->A,B = mat->B;
1733   PetscErrorCode ierr;
1734   PetscReal      isend[5],irecv[5];
1735 
1736   PetscFunctionBegin;
1737   info->block_size     = 1.0;
1738   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1739   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1740   isend[3] = info->memory;  isend[4] = info->mallocs;
1741   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1742   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1743   isend[3] += info->memory;  isend[4] += info->mallocs;
1744   if (flag == MAT_LOCAL) {
1745     info->nz_used      = isend[0];
1746     info->nz_allocated = isend[1];
1747     info->nz_unneeded  = isend[2];
1748     info->memory       = isend[3];
1749     info->mallocs      = isend[4];
1750   } else if (flag == MAT_GLOBAL_MAX) {
1751     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1752     info->nz_used      = irecv[0];
1753     info->nz_allocated = irecv[1];
1754     info->nz_unneeded  = irecv[2];
1755     info->memory       = irecv[3];
1756     info->mallocs      = irecv[4];
1757   } else if (flag == MAT_GLOBAL_SUM) {
1758     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1759     info->nz_used      = irecv[0];
1760     info->nz_allocated = irecv[1];
1761     info->nz_unneeded  = irecv[2];
1762     info->memory       = irecv[3];
1763     info->mallocs      = irecv[4];
1764   }
1765   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1766   info->fill_ratio_needed = 0;
1767   info->factor_mallocs    = 0;
1768 
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatSetOption_MPIAIJ"
1774 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1775 {
1776   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   switch (op) {
1781   case MAT_NEW_NONZERO_LOCATIONS:
1782   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1783   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1784   case MAT_KEEP_NONZERO_PATTERN:
1785   case MAT_NEW_NONZERO_LOCATION_ERR:
1786   case MAT_USE_INODES:
1787   case MAT_IGNORE_ZERO_ENTRIES:
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1790     break;
1791   case MAT_ROW_ORIENTED:
1792     a->roworiented = flg;
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1795     break;
1796   case MAT_NEW_DIAGONALS:
1797     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1798     break;
1799   case MAT_IGNORE_OFF_PROC_ENTRIES:
1800     a->donotstash = flg;
1801     break;
1802   case MAT_SPD:
1803     A->spd_set                         = PETSC_TRUE;
1804     A->spd                             = flg;
1805     if (flg) {
1806       A->symmetric                     = PETSC_TRUE;
1807       A->structurally_symmetric        = PETSC_TRUE;
1808       A->symmetric_set                 = PETSC_TRUE;
1809       A->structurally_symmetric_set    = PETSC_TRUE;
1810     }
1811     break;
1812   case MAT_SYMMETRIC:
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     break;
1815   case MAT_STRUCTURALLY_SYMMETRIC:
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     break;
1818   case MAT_HERMITIAN:
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_SYMMETRY_ETERNAL:
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     break;
1824   default:
1825     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatGetRow_MPIAIJ"
1832 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1833 {
1834   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1835   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1836   PetscErrorCode ierr;
1837   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1838   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1839   PetscInt       *cmap,*idx_p;
1840 
1841   PetscFunctionBegin;
1842   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1843   mat->getrowactive = PETSC_TRUE;
1844 
1845   if (!mat->rowvalues && (idx || v)) {
1846     /*
1847         allocate enough space to hold information from the longest row.
1848     */
1849     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1850     PetscInt   max = 1,tmp;
1851     for (i=0; i<matin->rmap->n; i++) {
1852       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1853       if (max < tmp) { max = tmp; }
1854     }
1855     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1856   }
1857 
1858   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1859   lrow = row - rstart;
1860 
1861   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1862   if (!v)   {pvA = 0; pvB = 0;}
1863   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1864   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1865   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1866   nztot = nzA + nzB;
1867 
1868   cmap  = mat->garray;
1869   if (v  || idx) {
1870     if (nztot) {
1871       /* Sort by increasing column numbers, assuming A and B already sorted */
1872       PetscInt imark = -1;
1873       if (v) {
1874         *v = v_p = mat->rowvalues;
1875         for (i=0; i<nzB; i++) {
1876           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1877           else break;
1878         }
1879         imark = i;
1880         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1881         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1882       }
1883       if (idx) {
1884         *idx = idx_p = mat->rowindices;
1885         if (imark > -1) {
1886           for (i=0; i<imark; i++) {
1887             idx_p[i] = cmap[cworkB[i]];
1888           }
1889         } else {
1890           for (i=0; i<nzB; i++) {
1891             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1892             else break;
1893           }
1894           imark = i;
1895         }
1896         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1897         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1898       }
1899     } else {
1900       if (idx) *idx = 0;
1901       if (v)   *v   = 0;
1902     }
1903   }
1904   *nz = nztot;
1905   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1906   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1912 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1913 {
1914   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1915 
1916   PetscFunctionBegin;
1917   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1918   aij->getrowactive = PETSC_FALSE;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatNorm_MPIAIJ"
1924 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1925 {
1926   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1927   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1928   PetscErrorCode ierr;
1929   PetscInt       i,j,cstart = mat->cmap->rstart;
1930   PetscReal      sum = 0.0;
1931   MatScalar      *v;
1932 
1933   PetscFunctionBegin;
1934   if (aij->size == 1) {
1935     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1936   } else {
1937     if (type == NORM_FROBENIUS) {
1938       v = amat->a;
1939       for (i=0; i<amat->nz; i++) {
1940 #if defined(PETSC_USE_COMPLEX)
1941         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1942 #else
1943         sum += (*v)*(*v); v++;
1944 #endif
1945       }
1946       v = bmat->a;
1947       for (i=0; i<bmat->nz; i++) {
1948 #if defined(PETSC_USE_COMPLEX)
1949         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1950 #else
1951         sum += (*v)*(*v); v++;
1952 #endif
1953       }
1954       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1955       *norm = PetscSqrtReal(*norm);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1960       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1961       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1962       *norm = 0.0;
1963       v = amat->a; jj = amat->j;
1964       for (j=0; j<amat->nz; j++) {
1965         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1966       }
1967       v = bmat->a; jj = bmat->j;
1968       for (j=0; j<bmat->nz; j++) {
1969         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1970       }
1971       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1972       for (j=0; j<mat->cmap->N; j++) {
1973         if (tmp2[j] > *norm) *norm = tmp2[j];
1974       }
1975       ierr = PetscFree(tmp);CHKERRQ(ierr);
1976       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1992     } else {
1993       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1994     }
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatTranspose_MPIAIJ"
2001 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2002 {
2003   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2004   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2005   PetscErrorCode ierr;
2006   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2007   PetscInt       cstart=A->cmap->rstart,ncol;
2008   Mat            B;
2009   MatScalar      *array;
2010 
2011   PetscFunctionBegin;
2012   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2013 
2014   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2015   ai = Aloc->i; aj = Aloc->j;
2016   bi = Bloc->i; bj = Bloc->j;
2017   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2018     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2019     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2020     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2021     for (i=0; i<ai[ma]; i++){
2022       d_nnz[aj[i]] ++;
2023       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2024     }
2025 
2026     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2027     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2028     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2029     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2030     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2031     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2032   } else {
2033     B = *matout;
2034   }
2035 
2036   /* copy over the A part */
2037   array = Aloc->a;
2038   row = A->rmap->rstart;
2039   for (i=0; i<ma; i++) {
2040     ncol = ai[i+1]-ai[i];
2041     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2042     row++; array += ncol; aj += ncol;
2043   }
2044   aj = Aloc->j;
2045   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2046 
2047   /* copy over the B part */
2048   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2049   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2050   array = Bloc->a;
2051   row = A->rmap->rstart;
2052   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2053   cols_tmp = cols;
2054   for (i=0; i<mb; i++) {
2055     ncol = bi[i+1]-bi[i];
2056     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2057     row++; array += ncol; cols_tmp += ncol;
2058   }
2059   ierr = PetscFree(cols);CHKERRQ(ierr);
2060 
2061   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2063   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2064     *matout = B;
2065   } else {
2066     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2073 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2074 {
2075   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2076   Mat            a = aij->A,b = aij->B;
2077   PetscErrorCode ierr;
2078   PetscInt       s1,s2,s3;
2079 
2080   PetscFunctionBegin;
2081   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2082   if (rr) {
2083     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2084     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2085     /* Overlap communication with computation. */
2086     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2087   }
2088   if (ll) {
2089     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2090     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2091     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2092   }
2093   /* scale  the diagonal block */
2094   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2095 
2096   if (rr) {
2097     /* Do a scatter end and then right scale the off-diagonal block */
2098     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2099     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2100   }
2101 
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2107 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2108 {
2109   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2110   PetscErrorCode ierr;
2111 
2112   PetscFunctionBegin;
2113   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2114   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2115   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2116   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2117   PetscFunctionReturn(0);
2118 }
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2121 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2122 {
2123   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2124   PetscErrorCode ierr;
2125 
2126   PetscFunctionBegin;
2127   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatEqual_MPIAIJ"
2133 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2134 {
2135   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2136   Mat            a,b,c,d;
2137   PetscBool      flg;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   a = matA->A; b = matA->B;
2142   c = matB->A; d = matB->B;
2143 
2144   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2145   if (flg) {
2146     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2147   }
2148   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatCopy_MPIAIJ"
2154 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2155 {
2156   PetscErrorCode ierr;
2157   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2158   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2159 
2160   PetscFunctionBegin;
2161   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2162   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2163     /* because of the column compression in the off-processor part of the matrix a->B,
2164        the number of columns in a->B and b->B may be different, hence we cannot call
2165        the MatCopy() directly on the two parts. If need be, we can provide a more
2166        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2167        then copying the submatrices */
2168     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2169   } else {
2170     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2171     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 #undef __FUNCT__
2177 #define __FUNCT__ "MatSetUp_MPIAIJ"
2178 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2179 {
2180   PetscErrorCode ierr;
2181 
2182   PetscFunctionBegin;
2183   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatAXPY_MPIAIJ"
2189 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2190 {
2191   PetscErrorCode ierr;
2192   PetscInt       i;
2193   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2194   PetscBLASInt   bnz,one=1;
2195   Mat_SeqAIJ     *x,*y;
2196 
2197   PetscFunctionBegin;
2198   if (str == SAME_NONZERO_PATTERN) {
2199     PetscScalar alpha = a;
2200     x = (Mat_SeqAIJ *)xx->A->data;
2201     y = (Mat_SeqAIJ *)yy->A->data;
2202     bnz = PetscBLASIntCast(x->nz);
2203     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2204     x = (Mat_SeqAIJ *)xx->B->data;
2205     y = (Mat_SeqAIJ *)yy->B->data;
2206     bnz = PetscBLASIntCast(x->nz);
2207     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2208   } else if (str == SUBSET_NONZERO_PATTERN) {
2209     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2210 
2211     x = (Mat_SeqAIJ *)xx->B->data;
2212     y = (Mat_SeqAIJ *)yy->B->data;
2213     if (y->xtoy && y->XtoY != xx->B) {
2214       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2215       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2216     }
2217     if (!y->xtoy) { /* get xtoy */
2218       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2219       y->XtoY = xx->B;
2220       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2221     }
2222     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2223   } else {
2224     Mat B;
2225     PetscInt *nnz_d,*nnz_o;
2226     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2227     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2228     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2229     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2230     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2231     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2232     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2233     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2234     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2235     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2236     ierr = MatHeaderReplace(Y,B);
2237     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2238     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2239   }
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2244 
2245 #undef __FUNCT__
2246 #define __FUNCT__ "MatConjugate_MPIAIJ"
2247 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2248 {
2249 #if defined(PETSC_USE_COMPLEX)
2250   PetscErrorCode ierr;
2251   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2252 
2253   PetscFunctionBegin;
2254   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2255   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2256 #else
2257   PetscFunctionBegin;
2258 #endif
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 #undef __FUNCT__
2263 #define __FUNCT__ "MatRealPart_MPIAIJ"
2264 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2265 {
2266   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2267   PetscErrorCode ierr;
2268 
2269   PetscFunctionBegin;
2270   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2271   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 #undef __FUNCT__
2276 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2277 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2278 {
2279   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2284   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #ifdef PETSC_HAVE_PBGL
2289 
2290 #include <boost/parallel/mpi/bsp_process_group.hpp>
2291 #include <boost/graph/distributed/ilu_default_graph.hpp>
2292 #include <boost/graph/distributed/ilu_0_block.hpp>
2293 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2294 #include <boost/graph/distributed/petsc/interface.hpp>
2295 #include <boost/multi_array.hpp>
2296 #include <boost/parallel/distributed_property_map->hpp>
2297 
2298 #undef __FUNCT__
2299 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2300 /*
2301   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2302 */
2303 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2304 {
2305   namespace petsc = boost::distributed::petsc;
2306 
2307   namespace graph_dist = boost::graph::distributed;
2308   using boost::graph::distributed::ilu_default::process_group_type;
2309   using boost::graph::ilu_permuted;
2310 
2311   PetscBool       row_identity, col_identity;
2312   PetscContainer  c;
2313   PetscInt        m, n, M, N;
2314   PetscErrorCode  ierr;
2315 
2316   PetscFunctionBegin;
2317   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2318   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2319   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2320   if (!row_identity || !col_identity) {
2321     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2322   }
2323 
2324   process_group_type pg;
2325   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2326   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2327   lgraph_type&   level_graph = *lgraph_p;
2328   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2329 
2330   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2331   ilu_permuted(level_graph);
2332 
2333   /* put together the new matrix */
2334   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2335   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2336   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2337   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2338   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2339   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341 
2342   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2343   ierr = PetscContainerSetPointer(c, lgraph_p);
2344   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2345   ierr = PetscContainerDestroy(&c);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2351 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2352 {
2353   PetscFunctionBegin;
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatSolve_MPIAIJ"
2359 /*
2360   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2361 */
2362 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2363 {
2364   namespace graph_dist = boost::graph::distributed;
2365 
2366   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2367   lgraph_type*   lgraph_p;
2368   PetscContainer c;
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2373   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2374   ierr = VecCopy(b, x);CHKERRQ(ierr);
2375 
2376   PetscScalar* array_x;
2377   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2378   PetscInt sx;
2379   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2380 
2381   PetscScalar* array_b;
2382   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2383   PetscInt sb;
2384   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2385 
2386   lgraph_type&   level_graph = *lgraph_p;
2387   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2388 
2389   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2390   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2391                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2392 
2393   typedef boost::iterator_property_map<array_ref_type::iterator,
2394                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2395   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2396                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2397 
2398   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2399 
2400   PetscFunctionReturn(0);
2401 }
2402 #endif
2403 
2404 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2405   PetscInt       nzlocal,nsends,nrecvs;
2406   PetscMPIInt    *send_rank,*recv_rank;
2407   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2408   PetscScalar    *sbuf_a,**rbuf_a;
2409   PetscErrorCode (*Destroy)(Mat);
2410 } Mat_Redundant;
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2414 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2415 {
2416   PetscErrorCode       ierr;
2417   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2418   PetscInt             i;
2419 
2420   PetscFunctionBegin;
2421   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2422   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2423   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2424   for (i=0; i<redund->nrecvs; i++){
2425     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2426     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2427   }
2428   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2429   ierr = PetscFree(redund);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatDestroy_MatRedundant"
2435 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2436 {
2437   PetscErrorCode  ierr;
2438   PetscContainer  container;
2439   Mat_Redundant   *redund=PETSC_NULL;
2440 
2441   PetscFunctionBegin;
2442   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2443   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2444   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2445   A->ops->destroy = redund->Destroy;
2446   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2447   if (A->ops->destroy) {
2448     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2449   }
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2455 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2456 {
2457   PetscMPIInt    rank,size;
2458   MPI_Comm       comm=((PetscObject)mat)->comm;
2459   PetscErrorCode ierr;
2460   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2461   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2462   PetscInt       *rowrange=mat->rmap->range;
2463   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2464   Mat            A=aij->A,B=aij->B,C=*matredundant;
2465   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2466   PetscScalar    *sbuf_a;
2467   PetscInt       nzlocal=a->nz+b->nz;
2468   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2469   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2470   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2471   MatScalar      *aworkA,*aworkB;
2472   PetscScalar    *vals;
2473   PetscMPIInt    tag1,tag2,tag3,imdex;
2474   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2475                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2476   MPI_Status     recv_status,*send_status;
2477   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2478   PetscInt       **rbuf_j=PETSC_NULL;
2479   PetscScalar    **rbuf_a=PETSC_NULL;
2480   Mat_Redundant  *redund=PETSC_NULL;
2481   PetscContainer container;
2482 
2483   PetscFunctionBegin;
2484   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2485   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2486 
2487   if (reuse == MAT_REUSE_MATRIX) {
2488     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2489     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2490     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2491     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2492     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2493     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2494     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2495     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2496 
2497     nsends    = redund->nsends;
2498     nrecvs    = redund->nrecvs;
2499     send_rank = redund->send_rank;
2500     recv_rank = redund->recv_rank;
2501     sbuf_nz   = redund->sbuf_nz;
2502     rbuf_nz   = redund->rbuf_nz;
2503     sbuf_j    = redund->sbuf_j;
2504     sbuf_a    = redund->sbuf_a;
2505     rbuf_j    = redund->rbuf_j;
2506     rbuf_a    = redund->rbuf_a;
2507   }
2508 
2509   if (reuse == MAT_INITIAL_MATRIX){
2510     PetscMPIInt  subrank,subsize;
2511     PetscInt     nleftover,np_subcomm;
2512     /* get the destination processors' id send_rank, nsends and nrecvs */
2513     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2514     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2515     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2516     np_subcomm = size/nsubcomm;
2517     nleftover  = size - nsubcomm*np_subcomm;
2518     nsends = 0; nrecvs = 0;
2519     for (i=0; i<size; i++){ /* i=rank*/
2520       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2521         send_rank[nsends] = i; nsends++;
2522         recv_rank[nrecvs++] = i;
2523       }
2524     }
2525     if (rank >= size - nleftover){/* this proc is a leftover processor */
2526       i = size-nleftover-1;
2527       j = 0;
2528       while (j < nsubcomm - nleftover){
2529         send_rank[nsends++] = i;
2530         i--; j++;
2531       }
2532     }
2533 
2534     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2535       for (i=0; i<nleftover; i++){
2536         recv_rank[nrecvs++] = size-nleftover+i;
2537       }
2538     }
2539 
2540     /* allocate sbuf_j, sbuf_a */
2541     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2542     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2543     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2544   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2545 
2546   /* copy mat's local entries into the buffers */
2547   if (reuse == MAT_INITIAL_MATRIX){
2548     rownz_max = 0;
2549     rptr = sbuf_j;
2550     cols = sbuf_j + rend-rstart + 1;
2551     vals = sbuf_a;
2552     rptr[0] = 0;
2553     for (i=0; i<rend-rstart; i++){
2554       row = i + rstart;
2555       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2556       ncols  = nzA + nzB;
2557       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2558       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2559       /* load the column indices for this row into cols */
2560       lwrite = 0;
2561       for (l=0; l<nzB; l++) {
2562         if ((ctmp = bmap[cworkB[l]]) < cstart){
2563           vals[lwrite]   = aworkB[l];
2564           cols[lwrite++] = ctmp;
2565         }
2566       }
2567       for (l=0; l<nzA; l++){
2568         vals[lwrite]   = aworkA[l];
2569         cols[lwrite++] = cstart + cworkA[l];
2570       }
2571       for (l=0; l<nzB; l++) {
2572         if ((ctmp = bmap[cworkB[l]]) >= cend){
2573           vals[lwrite]   = aworkB[l];
2574           cols[lwrite++] = ctmp;
2575         }
2576       }
2577       vals += ncols;
2578       cols += ncols;
2579       rptr[i+1] = rptr[i] + ncols;
2580       if (rownz_max < ncols) rownz_max = ncols;
2581     }
2582     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2583   } else { /* only copy matrix values into sbuf_a */
2584     rptr = sbuf_j;
2585     vals = sbuf_a;
2586     rptr[0] = 0;
2587     for (i=0; i<rend-rstart; i++){
2588       row = i + rstart;
2589       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2590       ncols  = nzA + nzB;
2591       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2592       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2593       lwrite = 0;
2594       for (l=0; l<nzB; l++) {
2595         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2596       }
2597       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2598       for (l=0; l<nzB; l++) {
2599         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2600       }
2601       vals += ncols;
2602       rptr[i+1] = rptr[i] + ncols;
2603     }
2604   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2605 
2606   /* send nzlocal to others, and recv other's nzlocal */
2607   /*--------------------------------------------------*/
2608   if (reuse == MAT_INITIAL_MATRIX){
2609     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2610     s_waits2 = s_waits3 + nsends;
2611     s_waits1 = s_waits2 + nsends;
2612     r_waits1 = s_waits1 + nsends;
2613     r_waits2 = r_waits1 + nrecvs;
2614     r_waits3 = r_waits2 + nrecvs;
2615   } else {
2616     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2617     r_waits3 = s_waits3 + nsends;
2618   }
2619 
2620   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2621   if (reuse == MAT_INITIAL_MATRIX){
2622     /* get new tags to keep the communication clean */
2623     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2624     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2625     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2626 
2627     /* post receives of other's nzlocal */
2628     for (i=0; i<nrecvs; i++){
2629       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2630     }
2631     /* send nzlocal to others */
2632     for (i=0; i<nsends; i++){
2633       sbuf_nz[i] = nzlocal;
2634       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2635     }
2636     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2637     count = nrecvs;
2638     while (count) {
2639       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2640       recv_rank[imdex] = recv_status.MPI_SOURCE;
2641       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2642       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2643 
2644       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2645       rbuf_nz[imdex] += i + 2;
2646       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2647       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2648       count--;
2649     }
2650     /* wait on sends of nzlocal */
2651     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2652     /* send mat->i,j to others, and recv from other's */
2653     /*------------------------------------------------*/
2654     for (i=0; i<nsends; i++){
2655       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2656       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2657     }
2658     /* wait on receives of mat->i,j */
2659     /*------------------------------*/
2660     count = nrecvs;
2661     while (count) {
2662       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2663       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2664       count--;
2665     }
2666     /* wait on sends of mat->i,j */
2667     /*---------------------------*/
2668     if (nsends) {
2669       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2670     }
2671   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2672 
2673   /* post receives, send and receive mat->a */
2674   /*----------------------------------------*/
2675   for (imdex=0; imdex<nrecvs; imdex++) {
2676     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2677   }
2678   for (i=0; i<nsends; i++){
2679     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2680   }
2681   count = nrecvs;
2682   while (count) {
2683     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2684     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2685     count--;
2686   }
2687   if (nsends) {
2688     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2689   }
2690 
2691   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2692 
2693   /* create redundant matrix */
2694   /*-------------------------*/
2695   if (reuse == MAT_INITIAL_MATRIX){
2696     /* compute rownz_max for preallocation */
2697     for (imdex=0; imdex<nrecvs; imdex++){
2698       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2699       rptr = rbuf_j[imdex];
2700       for (i=0; i<j; i++){
2701         ncols = rptr[i+1] - rptr[i];
2702         if (rownz_max < ncols) rownz_max = ncols;
2703       }
2704     }
2705 
2706     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2707     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2708     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2709     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2710     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2711   } else {
2712     C = *matredundant;
2713   }
2714 
2715   /* insert local matrix entries */
2716   rptr = sbuf_j;
2717   cols = sbuf_j + rend-rstart + 1;
2718   vals = sbuf_a;
2719   for (i=0; i<rend-rstart; i++){
2720     row   = i + rstart;
2721     ncols = rptr[i+1] - rptr[i];
2722     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2723     vals += ncols;
2724     cols += ncols;
2725   }
2726   /* insert received matrix entries */
2727   for (imdex=0; imdex<nrecvs; imdex++){
2728     rstart = rowrange[recv_rank[imdex]];
2729     rend   = rowrange[recv_rank[imdex]+1];
2730     rptr = rbuf_j[imdex];
2731     cols = rbuf_j[imdex] + rend-rstart + 1;
2732     vals = rbuf_a[imdex];
2733     for (i=0; i<rend-rstart; i++){
2734       row   = i + rstart;
2735       ncols = rptr[i+1] - rptr[i];
2736       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2737       vals += ncols;
2738       cols += ncols;
2739     }
2740   }
2741   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2742   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2743   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2744   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2745   if (reuse == MAT_INITIAL_MATRIX) {
2746     PetscContainer container;
2747     *matredundant = C;
2748     /* create a supporting struct and attach it to C for reuse */
2749     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2750     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2751     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2752     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2753     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2754     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2755 
2756     redund->nzlocal = nzlocal;
2757     redund->nsends  = nsends;
2758     redund->nrecvs  = nrecvs;
2759     redund->send_rank = send_rank;
2760     redund->recv_rank = recv_rank;
2761     redund->sbuf_nz = sbuf_nz;
2762     redund->rbuf_nz = rbuf_nz;
2763     redund->sbuf_j  = sbuf_j;
2764     redund->sbuf_a  = sbuf_a;
2765     redund->rbuf_j  = rbuf_j;
2766     redund->rbuf_a  = rbuf_a;
2767 
2768     redund->Destroy = C->ops->destroy;
2769     C->ops->destroy = MatDestroy_MatRedundant;
2770   }
2771   PetscFunctionReturn(0);
2772 }
2773 
2774 #undef __FUNCT__
2775 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2776 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2777 {
2778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2779   PetscErrorCode ierr;
2780   PetscInt       i,*idxb = 0;
2781   PetscScalar    *va,*vb;
2782   Vec            vtmp;
2783 
2784   PetscFunctionBegin;
2785   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2786   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2787   if (idx) {
2788     for (i=0; i<A->rmap->n; i++) {
2789       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2790     }
2791   }
2792 
2793   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2794   if (idx) {
2795     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2796   }
2797   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2798   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2799 
2800   for (i=0; i<A->rmap->n; i++){
2801     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2802       va[i] = vb[i];
2803       if (idx) idx[i] = a->garray[idxb[i]];
2804     }
2805   }
2806 
2807   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2808   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2809   ierr = PetscFree(idxb);CHKERRQ(ierr);
2810   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2816 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2817 {
2818   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2819   PetscErrorCode ierr;
2820   PetscInt       i,*idxb = 0;
2821   PetscScalar    *va,*vb;
2822   Vec            vtmp;
2823 
2824   PetscFunctionBegin;
2825   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2826   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2827   if (idx) {
2828     for (i=0; i<A->cmap->n; i++) {
2829       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2830     }
2831   }
2832 
2833   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2834   if (idx) {
2835     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2836   }
2837   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2838   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2839 
2840   for (i=0; i<A->rmap->n; i++){
2841     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2842       va[i] = vb[i];
2843       if (idx) idx[i] = a->garray[idxb[i]];
2844     }
2845   }
2846 
2847   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2848   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2849   ierr = PetscFree(idxb);CHKERRQ(ierr);
2850   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2856 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2857 {
2858   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2859   PetscInt       n      = A->rmap->n;
2860   PetscInt       cstart = A->cmap->rstart;
2861   PetscInt      *cmap   = mat->garray;
2862   PetscInt      *diagIdx, *offdiagIdx;
2863   Vec            diagV, offdiagV;
2864   PetscScalar   *a, *diagA, *offdiagA;
2865   PetscInt       r;
2866   PetscErrorCode ierr;
2867 
2868   PetscFunctionBegin;
2869   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2870   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2871   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2872   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2873   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2874   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2875   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2876   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2877   for(r = 0; r < n; ++r) {
2878     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2879       a[r]   = diagA[r];
2880       idx[r] = cstart + diagIdx[r];
2881     } else {
2882       a[r]   = offdiagA[r];
2883       idx[r] = cmap[offdiagIdx[r]];
2884     }
2885   }
2886   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2887   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2888   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2889   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2890   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2891   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2892   PetscFunctionReturn(0);
2893 }
2894 
2895 #undef __FUNCT__
2896 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2897 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2898 {
2899   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2900   PetscInt       n      = A->rmap->n;
2901   PetscInt       cstart = A->cmap->rstart;
2902   PetscInt      *cmap   = mat->garray;
2903   PetscInt      *diagIdx, *offdiagIdx;
2904   Vec            diagV, offdiagV;
2905   PetscScalar   *a, *diagA, *offdiagA;
2906   PetscInt       r;
2907   PetscErrorCode ierr;
2908 
2909   PetscFunctionBegin;
2910   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2911   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2912   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2913   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2914   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2915   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2916   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2917   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2918   for(r = 0; r < n; ++r) {
2919     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2920       a[r]   = diagA[r];
2921       idx[r] = cstart + diagIdx[r];
2922     } else {
2923       a[r]   = offdiagA[r];
2924       idx[r] = cmap[offdiagIdx[r]];
2925     }
2926   }
2927   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2928   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2929   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2930   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2931   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2932   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 #undef __FUNCT__
2937 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2938 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2939 {
2940   PetscErrorCode ierr;
2941   Mat            *dummy;
2942 
2943   PetscFunctionBegin;
2944   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2945   *newmat = *dummy;
2946   ierr = PetscFree(dummy);CHKERRQ(ierr);
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2954 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2955 {
2956   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2957   PetscErrorCode ierr;
2958 
2959   PetscFunctionBegin;
2960   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 
2965 /* -------------------------------------------------------------------*/
2966 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2967        MatGetRow_MPIAIJ,
2968        MatRestoreRow_MPIAIJ,
2969        MatMult_MPIAIJ,
2970 /* 4*/ MatMultAdd_MPIAIJ,
2971        MatMultTranspose_MPIAIJ,
2972        MatMultTransposeAdd_MPIAIJ,
2973 #ifdef PETSC_HAVE_PBGL
2974        MatSolve_MPIAIJ,
2975 #else
2976        0,
2977 #endif
2978        0,
2979        0,
2980 /*10*/ 0,
2981        0,
2982        0,
2983        MatSOR_MPIAIJ,
2984        MatTranspose_MPIAIJ,
2985 /*15*/ MatGetInfo_MPIAIJ,
2986        MatEqual_MPIAIJ,
2987        MatGetDiagonal_MPIAIJ,
2988        MatDiagonalScale_MPIAIJ,
2989        MatNorm_MPIAIJ,
2990 /*20*/ MatAssemblyBegin_MPIAIJ,
2991        MatAssemblyEnd_MPIAIJ,
2992        MatSetOption_MPIAIJ,
2993        MatZeroEntries_MPIAIJ,
2994 /*24*/ MatZeroRows_MPIAIJ,
2995        0,
2996 #ifdef PETSC_HAVE_PBGL
2997        0,
2998 #else
2999        0,
3000 #endif
3001        0,
3002        0,
3003 /*29*/ MatSetUp_MPIAIJ,
3004 #ifdef PETSC_HAVE_PBGL
3005        0,
3006 #else
3007        0,
3008 #endif
3009        0,
3010        0,
3011        0,
3012 /*34*/ MatDuplicate_MPIAIJ,
3013        0,
3014        0,
3015        0,
3016        0,
3017 /*39*/ MatAXPY_MPIAIJ,
3018        MatGetSubMatrices_MPIAIJ,
3019        MatIncreaseOverlap_MPIAIJ,
3020        MatGetValues_MPIAIJ,
3021        MatCopy_MPIAIJ,
3022 /*44*/ MatGetRowMax_MPIAIJ,
3023        MatScale_MPIAIJ,
3024        0,
3025        0,
3026        MatZeroRowsColumns_MPIAIJ,
3027 /*49*/ MatSetBlockSize_MPIAIJ,
3028        0,
3029        0,
3030        0,
3031        0,
3032 /*54*/ MatFDColoringCreate_MPIAIJ,
3033        0,
3034        MatSetUnfactored_MPIAIJ,
3035        MatPermute_MPIAIJ,
3036        0,
3037 /*59*/ MatGetSubMatrix_MPIAIJ,
3038        MatDestroy_MPIAIJ,
3039        MatView_MPIAIJ,
3040        0,
3041        0,
3042 /*64*/ 0,
3043        0,
3044        0,
3045        0,
3046        0,
3047 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3048        MatGetRowMinAbs_MPIAIJ,
3049        0,
3050        MatSetColoring_MPIAIJ,
3051 #if defined(PETSC_HAVE_ADIC)
3052        MatSetValuesAdic_MPIAIJ,
3053 #else
3054        0,
3055 #endif
3056        MatSetValuesAdifor_MPIAIJ,
3057 /*75*/ MatFDColoringApply_AIJ,
3058        0,
3059        0,
3060        0,
3061        0,
3062 /*80*/ 0,
3063        0,
3064        0,
3065 /*83*/ MatLoad_MPIAIJ,
3066        0,
3067        0,
3068        0,
3069        0,
3070        0,
3071 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3072        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3073        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3074        MatPtAP_Basic,
3075        MatPtAPSymbolic_MPIAIJ,
3076 /*94*/ MatPtAPNumeric_MPIAIJ,
3077        0,
3078        0,
3079        0,
3080        0,
3081 /*99*/ 0,
3082        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3083        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3084        MatConjugate_MPIAIJ,
3085        0,
3086 /*104*/MatSetValuesRow_MPIAIJ,
3087        MatRealPart_MPIAIJ,
3088        MatImaginaryPart_MPIAIJ,
3089        0,
3090        0,
3091 /*109*/0,
3092        MatGetRedundantMatrix_MPIAIJ,
3093        MatGetRowMin_MPIAIJ,
3094        0,
3095        0,
3096 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3097        0,
3098        0,
3099        0,
3100        0,
3101 /*119*/0,
3102        0,
3103        0,
3104        0,
3105        MatGetMultiProcBlock_MPIAIJ,
3106 /*124*/MatFindNonZeroRows_MPIAIJ,
3107        MatGetColumnNorms_MPIAIJ,
3108        MatInvertBlockDiagonal_MPIAIJ,
3109        0,
3110        MatGetSubMatricesParallel_MPIAIJ,
3111 /*129*/0,
3112        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3113        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3114        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3115        0,
3116 /*134*/0,
3117        0,
3118        0,
3119        0,
3120        0
3121 };
3122 
3123 /* ----------------------------------------------------------------------------------------*/
3124 
3125 EXTERN_C_BEGIN
3126 #undef __FUNCT__
3127 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3128 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3129 {
3130   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3131   PetscErrorCode ierr;
3132 
3133   PetscFunctionBegin;
3134   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3135   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3136   PetscFunctionReturn(0);
3137 }
3138 EXTERN_C_END
3139 
3140 EXTERN_C_BEGIN
3141 #undef __FUNCT__
3142 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3143 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3144 {
3145   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3146   PetscErrorCode ierr;
3147 
3148   PetscFunctionBegin;
3149   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3150   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3151   PetscFunctionReturn(0);
3152 }
3153 EXTERN_C_END
3154 
3155 EXTERN_C_BEGIN
3156 #undef __FUNCT__
3157 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3158 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3159 {
3160   Mat_MPIAIJ     *b;
3161   PetscErrorCode ierr;
3162   PetscInt       i;
3163   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3164 
3165   PetscFunctionBegin;
3166   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3167   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3168   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3169   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3170   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3171   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3172 
3173   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3174   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3175   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3176   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3177   if (d_nnz) {
3178     for (i=0; i<B->rmap->n; i++) {
3179       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3180     }
3181   }
3182   if (o_nnz) {
3183     for (i=0; i<B->rmap->n; i++) {
3184       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3185     }
3186   }
3187   b = (Mat_MPIAIJ*)B->data;
3188 
3189   if (!B->preallocated) {
3190     /* Explicitly create 2 MATSEQAIJ matrices. */
3191     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3192     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3193     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3194     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3195     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3196     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3197     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3198     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3199   }
3200 
3201   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3202   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3203   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3204   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3205   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3206   B->preallocated = PETSC_TRUE;
3207   PetscFunctionReturn(0);
3208 }
3209 EXTERN_C_END
3210 
3211 #undef __FUNCT__
3212 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3213 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3214 {
3215   Mat            mat;
3216   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3217   PetscErrorCode ierr;
3218 
3219   PetscFunctionBegin;
3220   *newmat       = 0;
3221   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3222   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3223   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3224   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3225   a    = (Mat_MPIAIJ*)mat->data;
3226 
3227   mat->factortype    = matin->factortype;
3228   mat->rmap->bs      = matin->rmap->bs;
3229   mat->assembled    = PETSC_TRUE;
3230   mat->insertmode   = NOT_SET_VALUES;
3231   mat->preallocated = PETSC_TRUE;
3232 
3233   a->size           = oldmat->size;
3234   a->rank           = oldmat->rank;
3235   a->donotstash     = oldmat->donotstash;
3236   a->roworiented    = oldmat->roworiented;
3237   a->rowindices     = 0;
3238   a->rowvalues      = 0;
3239   a->getrowactive   = PETSC_FALSE;
3240 
3241   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3242   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3243 
3244   if (oldmat->colmap) {
3245 #if defined (PETSC_USE_CTABLE)
3246     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3247 #else
3248     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3249     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3250     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3251 #endif
3252   } else a->colmap = 0;
3253   if (oldmat->garray) {
3254     PetscInt len;
3255     len  = oldmat->B->cmap->n;
3256     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3257     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3258     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3259   } else a->garray = 0;
3260 
3261   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3262   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3263   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3264   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3265   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3266   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3267   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3268   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3269   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3270   *newmat = mat;
3271   PetscFunctionReturn(0);
3272 }
3273 
3274 
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatLoad_MPIAIJ"
3278 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3279 {
3280   PetscScalar    *vals,*svals;
3281   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3282   PetscErrorCode ierr;
3283   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3284   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3285   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3286   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3287   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3288   int            fd;
3289 
3290   PetscFunctionBegin;
3291   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3292   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3293   if (!rank) {
3294     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3295     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3296     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3297   }
3298 
3299   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3300 
3301   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3302   M = header[1]; N = header[2];
3303   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3304   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3305   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3306 
3307   /* If global sizes are set, check if they are consistent with that given in the file */
3308   if (sizesset) {
3309     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3310   }
3311   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3312   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3313 
3314   /* determine ownership of all rows */
3315   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3316   else m = newMat->rmap->n; /* Set by user */
3317 
3318   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3319   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3320 
3321   /* First process needs enough room for process with most rows */
3322   if (!rank) {
3323     mmax       = rowners[1];
3324     for (i=2; i<size; i++) {
3325       mmax = PetscMax(mmax,rowners[i]);
3326     }
3327   } else mmax = m;
3328 
3329   rowners[0] = 0;
3330   for (i=2; i<=size; i++) {
3331     rowners[i] += rowners[i-1];
3332   }
3333   rstart = rowners[rank];
3334   rend   = rowners[rank+1];
3335 
3336   /* distribute row lengths to all processors */
3337   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3338   if (!rank) {
3339     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3340     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3341     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3342     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3343     for (j=0; j<m; j++) {
3344       procsnz[0] += ourlens[j];
3345     }
3346     for (i=1; i<size; i++) {
3347       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3348       /* calculate the number of nonzeros on each processor */
3349       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3350         procsnz[i] += rowlengths[j];
3351       }
3352       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3353     }
3354     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3355   } else {
3356     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3357   }
3358 
3359   if (!rank) {
3360     /* determine max buffer needed and allocate it */
3361     maxnz = 0;
3362     for (i=0; i<size; i++) {
3363       maxnz = PetscMax(maxnz,procsnz[i]);
3364     }
3365     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3366 
3367     /* read in my part of the matrix column indices  */
3368     nz   = procsnz[0];
3369     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3370     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3371 
3372     /* read in every one elses and ship off */
3373     for (i=1; i<size; i++) {
3374       nz     = procsnz[i];
3375       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3376       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3377     }
3378     ierr = PetscFree(cols);CHKERRQ(ierr);
3379   } else {
3380     /* determine buffer space needed for message */
3381     nz = 0;
3382     for (i=0; i<m; i++) {
3383       nz += ourlens[i];
3384     }
3385     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3386 
3387     /* receive message of column indices*/
3388     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3389   }
3390 
3391   /* determine column ownership if matrix is not square */
3392   if (N != M) {
3393     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3394     else n = newMat->cmap->n;
3395     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3396     cstart = cend - n;
3397   } else {
3398     cstart = rstart;
3399     cend   = rend;
3400     n      = cend - cstart;
3401   }
3402 
3403   /* loop over local rows, determining number of off diagonal entries */
3404   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3405   jj = 0;
3406   for (i=0; i<m; i++) {
3407     for (j=0; j<ourlens[i]; j++) {
3408       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3409       jj++;
3410     }
3411   }
3412 
3413   for (i=0; i<m; i++) {
3414     ourlens[i] -= offlens[i];
3415   }
3416   if (!sizesset) {
3417     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3418   }
3419   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3420 
3421   for (i=0; i<m; i++) {
3422     ourlens[i] += offlens[i];
3423   }
3424 
3425   if (!rank) {
3426     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3427 
3428     /* read in my part of the matrix numerical values  */
3429     nz   = procsnz[0];
3430     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3431 
3432     /* insert into matrix */
3433     jj      = rstart;
3434     smycols = mycols;
3435     svals   = vals;
3436     for (i=0; i<m; i++) {
3437       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3438       smycols += ourlens[i];
3439       svals   += ourlens[i];
3440       jj++;
3441     }
3442 
3443     /* read in other processors and ship out */
3444     for (i=1; i<size; i++) {
3445       nz     = procsnz[i];
3446       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3447       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3448     }
3449     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3450   } else {
3451     /* receive numeric values */
3452     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3453 
3454     /* receive message of values*/
3455     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3456 
3457     /* insert into matrix */
3458     jj      = rstart;
3459     smycols = mycols;
3460     svals   = vals;
3461     for (i=0; i<m; i++) {
3462       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3463       smycols += ourlens[i];
3464       svals   += ourlens[i];
3465       jj++;
3466     }
3467   }
3468   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3469   ierr = PetscFree(vals);CHKERRQ(ierr);
3470   ierr = PetscFree(mycols);CHKERRQ(ierr);
3471   ierr = PetscFree(rowners);CHKERRQ(ierr);
3472 
3473   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3474   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3475   PetscFunctionReturn(0);
3476 }
3477 
3478 #undef __FUNCT__
3479 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3480 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3481 {
3482   PetscErrorCode ierr;
3483   IS             iscol_local;
3484   PetscInt       csize;
3485 
3486   PetscFunctionBegin;
3487   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3488   if (call == MAT_REUSE_MATRIX) {
3489     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3490     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3491   } else {
3492     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3493   }
3494   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3495   if (call == MAT_INITIAL_MATRIX) {
3496     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3497     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3498   }
3499   PetscFunctionReturn(0);
3500 }
3501 
3502 #undef __FUNCT__
3503 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3504 /*
3505     Not great since it makes two copies of the submatrix, first an SeqAIJ
3506   in local and then by concatenating the local matrices the end result.
3507   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3508 
3509   Note: This requires a sequential iscol with all indices.
3510 */
3511 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3512 {
3513   PetscErrorCode ierr;
3514   PetscMPIInt    rank,size;
3515   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3516   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3517   Mat            *local,M,Mreuse;
3518   MatScalar      *vwork,*aa;
3519   MPI_Comm       comm = ((PetscObject)mat)->comm;
3520   Mat_SeqAIJ     *aij;
3521 
3522 
3523   PetscFunctionBegin;
3524   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3525   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3526 
3527   if (call ==  MAT_REUSE_MATRIX) {
3528     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3529     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3530     local = &Mreuse;
3531     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3532   } else {
3533     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3534     Mreuse = *local;
3535     ierr   = PetscFree(local);CHKERRQ(ierr);
3536   }
3537 
3538   /*
3539       m - number of local rows
3540       n - number of columns (same on all processors)
3541       rstart - first row in new global matrix generated
3542   */
3543   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3544   if (call == MAT_INITIAL_MATRIX) {
3545     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3546     ii  = aij->i;
3547     jj  = aij->j;
3548 
3549     /*
3550         Determine the number of non-zeros in the diagonal and off-diagonal
3551         portions of the matrix in order to do correct preallocation
3552     */
3553 
3554     /* first get start and end of "diagonal" columns */
3555     if (csize == PETSC_DECIDE) {
3556       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3557       if (mglobal == n) { /* square matrix */
3558 	nlocal = m;
3559       } else {
3560         nlocal = n/size + ((n % size) > rank);
3561       }
3562     } else {
3563       nlocal = csize;
3564     }
3565     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3566     rstart = rend - nlocal;
3567     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3568 
3569     /* next, compute all the lengths */
3570     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3571     olens = dlens + m;
3572     for (i=0; i<m; i++) {
3573       jend = ii[i+1] - ii[i];
3574       olen = 0;
3575       dlen = 0;
3576       for (j=0; j<jend; j++) {
3577         if (*jj < rstart || *jj >= rend) olen++;
3578         else dlen++;
3579         jj++;
3580       }
3581       olens[i] = olen;
3582       dlens[i] = dlen;
3583     }
3584     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3585     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3586     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3587     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3588     ierr = PetscFree(dlens);CHKERRQ(ierr);
3589   } else {
3590     PetscInt ml,nl;
3591 
3592     M = *newmat;
3593     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3594     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3595     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3596     /*
3597          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3598        rather than the slower MatSetValues().
3599     */
3600     M->was_assembled = PETSC_TRUE;
3601     M->assembled     = PETSC_FALSE;
3602   }
3603   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3604   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3605   ii  = aij->i;
3606   jj  = aij->j;
3607   aa  = aij->a;
3608   for (i=0; i<m; i++) {
3609     row   = rstart + i;
3610     nz    = ii[i+1] - ii[i];
3611     cwork = jj;     jj += nz;
3612     vwork = aa;     aa += nz;
3613     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3614   }
3615 
3616   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3617   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   *newmat = M;
3619 
3620   /* save submatrix used in processor for next request */
3621   if (call ==  MAT_INITIAL_MATRIX) {
3622     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3623     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3624   }
3625 
3626   PetscFunctionReturn(0);
3627 }
3628 
3629 EXTERN_C_BEGIN
3630 #undef __FUNCT__
3631 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3632 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3633 {
3634   PetscInt       m,cstart, cend,j,nnz,i,d;
3635   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3636   const PetscInt *JJ;
3637   PetscScalar    *values;
3638   PetscErrorCode ierr;
3639 
3640   PetscFunctionBegin;
3641   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3642 
3643   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3644   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3645   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3646   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3647   m      = B->rmap->n;
3648   cstart = B->cmap->rstart;
3649   cend   = B->cmap->rend;
3650   rstart = B->rmap->rstart;
3651 
3652   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3653 
3654 #if defined(PETSC_USE_DEBUGGING)
3655   for (i=0; i<m; i++) {
3656     nnz     = Ii[i+1]- Ii[i];
3657     JJ      = J + Ii[i];
3658     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3659     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3660     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3661   }
3662 #endif
3663 
3664   for (i=0; i<m; i++) {
3665     nnz     = Ii[i+1]- Ii[i];
3666     JJ      = J + Ii[i];
3667     nnz_max = PetscMax(nnz_max,nnz);
3668     d       = 0;
3669     for (j=0; j<nnz; j++) {
3670       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3671     }
3672     d_nnz[i] = d;
3673     o_nnz[i] = nnz - d;
3674   }
3675   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3676   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3677 
3678   if (v) values = (PetscScalar*)v;
3679   else {
3680     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3681     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3682   }
3683 
3684   for (i=0; i<m; i++) {
3685     ii   = i + rstart;
3686     nnz  = Ii[i+1]- Ii[i];
3687     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3688   }
3689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691 
3692   if (!v) {
3693     ierr = PetscFree(values);CHKERRQ(ierr);
3694   }
3695   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3696   PetscFunctionReturn(0);
3697 }
3698 EXTERN_C_END
3699 
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3702 /*@
3703    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3704    (the default parallel PETSc format).
3705 
3706    Collective on MPI_Comm
3707 
3708    Input Parameters:
3709 +  B - the matrix
3710 .  i - the indices into j for the start of each local row (starts with zero)
3711 .  j - the column indices for each local row (starts with zero)
3712 -  v - optional values in the matrix
3713 
3714    Level: developer
3715 
3716    Notes:
3717        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3718      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3719      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3720 
3721        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3722 
3723        The format which is used for the sparse matrix input, is equivalent to a
3724     row-major ordering.. i.e for the following matrix, the input data expected is
3725     as shown:
3726 
3727         1 0 0
3728         2 0 3     P0
3729        -------
3730         4 5 6     P1
3731 
3732      Process0 [P0]: rows_owned=[0,1]
3733         i =  {0,1,3}  [size = nrow+1  = 2+1]
3734         j =  {0,0,2}  [size = nz = 6]
3735         v =  {1,2,3}  [size = nz = 6]
3736 
3737      Process1 [P1]: rows_owned=[2]
3738         i =  {0,3}    [size = nrow+1  = 1+1]
3739         j =  {0,1,2}  [size = nz = 6]
3740         v =  {4,5,6}  [size = nz = 6]
3741 
3742 .keywords: matrix, aij, compressed row, sparse, parallel
3743 
3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3745           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3746 @*/
3747 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3748 {
3749   PetscErrorCode ierr;
3750 
3751   PetscFunctionBegin;
3752   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 #undef __FUNCT__
3757 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3758 /*@C
3759    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3760    (the default parallel PETSc format).  For good matrix assembly performance
3761    the user should preallocate the matrix storage by setting the parameters
3762    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3763    performance can be increased by more than a factor of 50.
3764 
3765    Collective on MPI_Comm
3766 
3767    Input Parameters:
3768 +  A - the matrix
3769 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3770            (same value is used for all local rows)
3771 .  d_nnz - array containing the number of nonzeros in the various rows of the
3772            DIAGONAL portion of the local submatrix (possibly different for each row)
3773            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3774            The size of this array is equal to the number of local rows, i.e 'm'.
3775            For matrices that will be factored, you must leave room for (and set)
3776            the diagonal entry even if it is zero.
3777 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3778            submatrix (same value is used for all local rows).
3779 -  o_nnz - array containing the number of nonzeros in the various rows of the
3780            OFF-DIAGONAL portion of the local submatrix (possibly different for
3781            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3782            structure. The size of this array is equal to the number
3783            of local rows, i.e 'm'.
3784 
3785    If the *_nnz parameter is given then the *_nz parameter is ignored
3786 
3787    The AIJ format (also called the Yale sparse matrix format or
3788    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3789    storage.  The stored row and column indices begin with zero.
3790    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3791 
3792    The parallel matrix is partitioned such that the first m0 rows belong to
3793    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3794    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3795 
3796    The DIAGONAL portion of the local submatrix of a processor can be defined
3797    as the submatrix which is obtained by extraction the part corresponding to
3798    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3799    first row that belongs to the processor, r2 is the last row belonging to
3800    the this processor, and c1-c2 is range of indices of the local part of a
3801    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3802    common case of a square matrix, the row and column ranges are the same and
3803    the DIAGONAL part is also square. The remaining portion of the local
3804    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3805 
3806    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3807 
3808    You can call MatGetInfo() to get information on how effective the preallocation was;
3809    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3810    You can also run with the option -info and look for messages with the string
3811    malloc in them to see if additional memory allocation was needed.
3812 
3813    Example usage:
3814 
3815    Consider the following 8x8 matrix with 34 non-zero values, that is
3816    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3817    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3818    as follows:
3819 
3820 .vb
3821             1  2  0  |  0  3  0  |  0  4
3822     Proc0   0  5  6  |  7  0  0  |  8  0
3823             9  0 10  | 11  0  0  | 12  0
3824     -------------------------------------
3825            13  0 14  | 15 16 17  |  0  0
3826     Proc1   0 18  0  | 19 20 21  |  0  0
3827             0  0  0  | 22 23  0  | 24  0
3828     -------------------------------------
3829     Proc2  25 26 27  |  0  0 28  | 29  0
3830            30  0  0  | 31 32 33  |  0 34
3831 .ve
3832 
3833    This can be represented as a collection of submatrices as:
3834 
3835 .vb
3836       A B C
3837       D E F
3838       G H I
3839 .ve
3840 
3841    Where the submatrices A,B,C are owned by proc0, D,E,F are
3842    owned by proc1, G,H,I are owned by proc2.
3843 
3844    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3845    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3846    The 'M','N' parameters are 8,8, and have the same values on all procs.
3847 
3848    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3849    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3850    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3851    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3852    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3853    matrix, ans [DF] as another SeqAIJ matrix.
3854 
3855    When d_nz, o_nz parameters are specified, d_nz storage elements are
3856    allocated for every row of the local diagonal submatrix, and o_nz
3857    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3858    One way to choose d_nz and o_nz is to use the max nonzerors per local
3859    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3860    In this case, the values of d_nz,o_nz are:
3861 .vb
3862      proc0 : dnz = 2, o_nz = 2
3863      proc1 : dnz = 3, o_nz = 2
3864      proc2 : dnz = 1, o_nz = 4
3865 .ve
3866    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3867    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3868    for proc3. i.e we are using 12+15+10=37 storage locations to store
3869    34 values.
3870 
3871    When d_nnz, o_nnz parameters are specified, the storage is specified
3872    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3873    In the above case the values for d_nnz,o_nnz are:
3874 .vb
3875      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3876      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3877      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3878 .ve
3879    Here the space allocated is sum of all the above values i.e 34, and
3880    hence pre-allocation is perfect.
3881 
3882    Level: intermediate
3883 
3884 .keywords: matrix, aij, compressed row, sparse, parallel
3885 
3886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3887           MPIAIJ, MatGetInfo()
3888 @*/
3889 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3890 {
3891   PetscErrorCode ierr;
3892 
3893   PetscFunctionBegin;
3894   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3895   PetscValidType(B,1);
3896   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3897   PetscFunctionReturn(0);
3898 }
3899 
3900 #undef __FUNCT__
3901 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3902 /*@
3903      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3904          CSR format the local rows.
3905 
3906    Collective on MPI_Comm
3907 
3908    Input Parameters:
3909 +  comm - MPI communicator
3910 .  m - number of local rows (Cannot be PETSC_DECIDE)
3911 .  n - This value should be the same as the local size used in creating the
3912        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3913        calculated if N is given) For square matrices n is almost always m.
3914 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3915 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3916 .   i - row indices
3917 .   j - column indices
3918 -   a - matrix values
3919 
3920    Output Parameter:
3921 .   mat - the matrix
3922 
3923    Level: intermediate
3924 
3925    Notes:
3926        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3927      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3928      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3929 
3930        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3931 
3932        The format which is used for the sparse matrix input, is equivalent to a
3933     row-major ordering.. i.e for the following matrix, the input data expected is
3934     as shown:
3935 
3936         1 0 0
3937         2 0 3     P0
3938        -------
3939         4 5 6     P1
3940 
3941      Process0 [P0]: rows_owned=[0,1]
3942         i =  {0,1,3}  [size = nrow+1  = 2+1]
3943         j =  {0,0,2}  [size = nz = 6]
3944         v =  {1,2,3}  [size = nz = 6]
3945 
3946      Process1 [P1]: rows_owned=[2]
3947         i =  {0,3}    [size = nrow+1  = 1+1]
3948         j =  {0,1,2}  [size = nz = 6]
3949         v =  {4,5,6}  [size = nz = 6]
3950 
3951 .keywords: matrix, aij, compressed row, sparse, parallel
3952 
3953 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3954           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3955 @*/
3956 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3957 {
3958   PetscErrorCode ierr;
3959 
3960  PetscFunctionBegin;
3961   if (i[0]) {
3962     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3963   }
3964   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3965   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3966   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3967   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3968   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3969   PetscFunctionReturn(0);
3970 }
3971 
3972 #undef __FUNCT__
3973 #define __FUNCT__ "MatCreateMPIAIJ"
3974 /*@C
3975    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3976    (the default parallel PETSc format).  For good matrix assembly performance
3977    the user should preallocate the matrix storage by setting the parameters
3978    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3979    performance can be increased by more than a factor of 50.
3980 
3981    Collective on MPI_Comm
3982 
3983    Input Parameters:
3984 +  comm - MPI communicator
3985 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3986            This value should be the same as the local size used in creating the
3987            y vector for the matrix-vector product y = Ax.
3988 .  n - This value should be the same as the local size used in creating the
3989        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3990        calculated if N is given) For square matrices n is almost always m.
3991 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3992 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3993 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3994            (same value is used for all local rows)
3995 .  d_nnz - array containing the number of nonzeros in the various rows of the
3996            DIAGONAL portion of the local submatrix (possibly different for each row)
3997            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3998            The size of this array is equal to the number of local rows, i.e 'm'.
3999 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4000            submatrix (same value is used for all local rows).
4001 -  o_nnz - array containing the number of nonzeros in the various rows of the
4002            OFF-DIAGONAL portion of the local submatrix (possibly different for
4003            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4004            structure. The size of this array is equal to the number
4005            of local rows, i.e 'm'.
4006 
4007    Output Parameter:
4008 .  A - the matrix
4009 
4010    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4011    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4012    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4013 
4014    Notes:
4015    If the *_nnz parameter is given then the *_nz parameter is ignored
4016 
4017    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4018    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4019    storage requirements for this matrix.
4020 
4021    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4022    processor than it must be used on all processors that share the object for
4023    that argument.
4024 
4025    The user MUST specify either the local or global matrix dimensions
4026    (possibly both).
4027 
4028    The parallel matrix is partitioned across processors such that the
4029    first m0 rows belong to process 0, the next m1 rows belong to
4030    process 1, the next m2 rows belong to process 2 etc.. where
4031    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4032    values corresponding to [m x N] submatrix.
4033 
4034    The columns are logically partitioned with the n0 columns belonging
4035    to 0th partition, the next n1 columns belonging to the next
4036    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4037 
4038    The DIAGONAL portion of the local submatrix on any given processor
4039    is the submatrix corresponding to the rows and columns m,n
4040    corresponding to the given processor. i.e diagonal matrix on
4041    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4042    etc. The remaining portion of the local submatrix [m x (N-n)]
4043    constitute the OFF-DIAGONAL portion. The example below better
4044    illustrates this concept.
4045 
4046    For a square global matrix we define each processor's diagonal portion
4047    to be its local rows and the corresponding columns (a square submatrix);
4048    each processor's off-diagonal portion encompasses the remainder of the
4049    local matrix (a rectangular submatrix).
4050 
4051    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4052 
4053    When calling this routine with a single process communicator, a matrix of
4054    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4055    type of communicator, use the construction mechanism:
4056      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4057 
4058    By default, this format uses inodes (identical nodes) when possible.
4059    We search for consecutive rows with the same nonzero structure, thereby
4060    reusing matrix information to achieve increased efficiency.
4061 
4062    Options Database Keys:
4063 +  -mat_no_inode  - Do not use inodes
4064 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4065 -  -mat_aij_oneindex - Internally use indexing starting at 1
4066         rather than 0.  Note that when calling MatSetValues(),
4067         the user still MUST index entries starting at 0!
4068 
4069 
4070    Example usage:
4071 
4072    Consider the following 8x8 matrix with 34 non-zero values, that is
4073    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4074    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4075    as follows:
4076 
4077 .vb
4078             1  2  0  |  0  3  0  |  0  4
4079     Proc0   0  5  6  |  7  0  0  |  8  0
4080             9  0 10  | 11  0  0  | 12  0
4081     -------------------------------------
4082            13  0 14  | 15 16 17  |  0  0
4083     Proc1   0 18  0  | 19 20 21  |  0  0
4084             0  0  0  | 22 23  0  | 24  0
4085     -------------------------------------
4086     Proc2  25 26 27  |  0  0 28  | 29  0
4087            30  0  0  | 31 32 33  |  0 34
4088 .ve
4089 
4090    This can be represented as a collection of submatrices as:
4091 
4092 .vb
4093       A B C
4094       D E F
4095       G H I
4096 .ve
4097 
4098    Where the submatrices A,B,C are owned by proc0, D,E,F are
4099    owned by proc1, G,H,I are owned by proc2.
4100 
4101    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4102    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4103    The 'M','N' parameters are 8,8, and have the same values on all procs.
4104 
4105    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4106    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4107    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4108    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4109    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4110    matrix, ans [DF] as another SeqAIJ matrix.
4111 
4112    When d_nz, o_nz parameters are specified, d_nz storage elements are
4113    allocated for every row of the local diagonal submatrix, and o_nz
4114    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4115    One way to choose d_nz and o_nz is to use the max nonzerors per local
4116    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4117    In this case, the values of d_nz,o_nz are:
4118 .vb
4119      proc0 : dnz = 2, o_nz = 2
4120      proc1 : dnz = 3, o_nz = 2
4121      proc2 : dnz = 1, o_nz = 4
4122 .ve
4123    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4124    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4125    for proc3. i.e we are using 12+15+10=37 storage locations to store
4126    34 values.
4127 
4128    When d_nnz, o_nnz parameters are specified, the storage is specified
4129    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4130    In the above case the values for d_nnz,o_nnz are:
4131 .vb
4132      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4133      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4134      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4135 .ve
4136    Here the space allocated is sum of all the above values i.e 34, and
4137    hence pre-allocation is perfect.
4138 
4139    Level: intermediate
4140 
4141 .keywords: matrix, aij, compressed row, sparse, parallel
4142 
4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4144           MPIAIJ, MatCreateMPIAIJWithArrays()
4145 @*/
4146 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4147 {
4148   PetscErrorCode ierr;
4149   PetscMPIInt    size;
4150 
4151   PetscFunctionBegin;
4152   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4153   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4154   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4155   if (size > 1) {
4156     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4157     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4158   } else {
4159     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4160     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4161   }
4162   PetscFunctionReturn(0);
4163 }
4164 
4165 #undef __FUNCT__
4166 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4167 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4168 {
4169   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4170 
4171   PetscFunctionBegin;
4172   *Ad     = a->A;
4173   *Ao     = a->B;
4174   *colmap = a->garray;
4175   PetscFunctionReturn(0);
4176 }
4177 
4178 #undef __FUNCT__
4179 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4180 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4181 {
4182   PetscErrorCode ierr;
4183   PetscInt       i;
4184   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4185 
4186   PetscFunctionBegin;
4187   if (coloring->ctype == IS_COLORING_GLOBAL) {
4188     ISColoringValue *allcolors,*colors;
4189     ISColoring      ocoloring;
4190 
4191     /* set coloring for diagonal portion */
4192     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4193 
4194     /* set coloring for off-diagonal portion */
4195     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4196     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4197     for (i=0; i<a->B->cmap->n; i++) {
4198       colors[i] = allcolors[a->garray[i]];
4199     }
4200     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4201     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4202     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4203     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4204   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4205     ISColoringValue *colors;
4206     PetscInt        *larray;
4207     ISColoring      ocoloring;
4208 
4209     /* set coloring for diagonal portion */
4210     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4211     for (i=0; i<a->A->cmap->n; i++) {
4212       larray[i] = i + A->cmap->rstart;
4213     }
4214     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4215     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4216     for (i=0; i<a->A->cmap->n; i++) {
4217       colors[i] = coloring->colors[larray[i]];
4218     }
4219     ierr = PetscFree(larray);CHKERRQ(ierr);
4220     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4221     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4222     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4223 
4224     /* set coloring for off-diagonal portion */
4225     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4226     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4227     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4228     for (i=0; i<a->B->cmap->n; i++) {
4229       colors[i] = coloring->colors[larray[i]];
4230     }
4231     ierr = PetscFree(larray);CHKERRQ(ierr);
4232     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4233     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4234     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4235   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4236 
4237   PetscFunctionReturn(0);
4238 }
4239 
4240 #if defined(PETSC_HAVE_ADIC)
4241 #undef __FUNCT__
4242 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4243 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4244 {
4245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4246   PetscErrorCode ierr;
4247 
4248   PetscFunctionBegin;
4249   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4250   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4251   PetscFunctionReturn(0);
4252 }
4253 #endif
4254 
4255 #undef __FUNCT__
4256 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4257 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4258 {
4259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4260   PetscErrorCode ierr;
4261 
4262   PetscFunctionBegin;
4263   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4264   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4265   PetscFunctionReturn(0);
4266 }
4267 
4268 #undef __FUNCT__
4269 #define __FUNCT__ "MatMergeSymbolic"
4270 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4271 {
4272   PetscErrorCode ierr;
4273   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4274   PetscInt       *indx;
4275 
4276   PetscFunctionBegin;
4277   /* This routine will ONLY return MPIAIJ type matrix */
4278   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4279   if (n == PETSC_DECIDE){
4280     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4281   }
4282   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4283   rstart -= m;
4284 
4285   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4286   for (i=0;i<m;i++) {
4287     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4288     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4289     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4290   }
4291 
4292   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4293   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4294   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4295   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4296   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4297   PetscFunctionReturn(0);
4298 }
4299 
4300 #undef __FUNCT__
4301 #define __FUNCT__ "MatMergeNumeric"
4302 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4303 {
4304   PetscErrorCode ierr;
4305   PetscInt       m,N,i,rstart,nnz,Ii;
4306   PetscInt       *indx;
4307   PetscScalar    *values;
4308 
4309   PetscFunctionBegin;
4310   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4311   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4312   for (i=0;i<m;i++) {
4313     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4314     Ii    = i + rstart;
4315     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4316     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4317   }
4318   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4319   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4320   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4321   PetscFunctionReturn(0);
4322 }
4323 
4324 #undef __FUNCT__
4325 #define __FUNCT__ "MatMerge"
4326 /*@
4327       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4328                  matrices from each processor
4329 
4330     Collective on MPI_Comm
4331 
4332    Input Parameters:
4333 +    comm - the communicators the parallel matrix will live on
4334 .    inmat - the input sequential matrices
4335 .    n - number of local columns (or PETSC_DECIDE)
4336 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4337 
4338    Output Parameter:
4339 .    outmat - the parallel matrix generated
4340 
4341     Level: advanced
4342 
4343    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4344 
4345 @*/
4346 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4347 {
4348   PetscErrorCode ierr;
4349 
4350   PetscFunctionBegin;
4351   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4352   if (scall == MAT_INITIAL_MATRIX){
4353     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4354   }
4355   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4356   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4357   PetscFunctionReturn(0);
4358 }
4359 
4360 #undef __FUNCT__
4361 #define __FUNCT__ "MatFileSplit"
4362 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4363 {
4364   PetscErrorCode    ierr;
4365   PetscMPIInt       rank;
4366   PetscInt          m,N,i,rstart,nnz;
4367   size_t            len;
4368   const PetscInt    *indx;
4369   PetscViewer       out;
4370   char              *name;
4371   Mat               B;
4372   const PetscScalar *values;
4373 
4374   PetscFunctionBegin;
4375   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4376   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4377   /* Should this be the type of the diagonal block of A? */
4378   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4379   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4380   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4381   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4382   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4383   for (i=0;i<m;i++) {
4384     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4385     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4386     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4387   }
4388   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4389   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4390 
4391   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4392   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4393   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4394   sprintf(name,"%s.%d",outfile,rank);
4395   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4396   ierr = PetscFree(name);
4397   ierr = MatView(B,out);CHKERRQ(ierr);
4398   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4399   ierr = MatDestroy(&B);CHKERRQ(ierr);
4400   PetscFunctionReturn(0);
4401 }
4402 
4403 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4404 #undef __FUNCT__
4405 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4406 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4407 {
4408   PetscErrorCode       ierr;
4409   Mat_Merge_SeqsToMPI  *merge;
4410   PetscContainer       container;
4411 
4412   PetscFunctionBegin;
4413   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4414   if (container) {
4415     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4428     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4429     ierr = PetscFree(merge);CHKERRQ(ierr);
4430     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4431   }
4432   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 #include <../src/mat/utils/freespace.h>
4437 #include <petscbt.h>
4438 
4439 #undef __FUNCT__
4440 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4441 /*@C
4442       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4443                  matrices from each processor
4444 
4445     Collective on MPI_Comm
4446 
4447    Input Parameters:
4448 +    comm - the communicators the parallel matrix will live on
4449 .    seqmat - the input sequential matrices
4450 .    m - number of local rows (or PETSC_DECIDE)
4451 .    n - number of local columns (or PETSC_DECIDE)
4452 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4453 
4454    Output Parameter:
4455 .    mpimat - the parallel matrix generated
4456 
4457     Level: advanced
4458 
4459    Notes:
4460      The dimensions of the sequential matrix in each processor MUST be the same.
4461      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4462      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4463 @*/
4464 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4465 {
4466   PetscErrorCode       ierr;
4467   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4468   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4469   PetscMPIInt          size,rank,taga,*len_s;
4470   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4471   PetscInt             proc,m;
4472   PetscInt             **buf_ri,**buf_rj;
4473   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4474   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4475   MPI_Request          *s_waits,*r_waits;
4476   MPI_Status           *status;
4477   MatScalar            *aa=a->a;
4478   MatScalar            **abuf_r,*ba_i;
4479   Mat_Merge_SeqsToMPI  *merge;
4480   PetscContainer       container;
4481 
4482   PetscFunctionBegin;
4483   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4484 
4485   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4486   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4487 
4488   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4489   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4490 
4491   bi     = merge->bi;
4492   bj     = merge->bj;
4493   buf_ri = merge->buf_ri;
4494   buf_rj = merge->buf_rj;
4495 
4496   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4497   owners = merge->rowmap->range;
4498   len_s  = merge->len_s;
4499 
4500   /* send and recv matrix values */
4501   /*-----------------------------*/
4502   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4503   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4504 
4505   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4506   for (proc=0,k=0; proc<size; proc++){
4507     if (!len_s[proc]) continue;
4508     i = owners[proc];
4509     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4510     k++;
4511   }
4512 
4513   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4514   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4515   ierr = PetscFree(status);CHKERRQ(ierr);
4516 
4517   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4518   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4519 
4520   /* insert mat values of mpimat */
4521   /*----------------------------*/
4522   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4523   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4524 
4525   for (k=0; k<merge->nrecv; k++){
4526     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4527     nrows = *(buf_ri_k[k]);
4528     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4529     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4530   }
4531 
4532   /* set values of ba */
4533   m = merge->rowmap->n;
4534   for (i=0; i<m; i++) {
4535     arow = owners[rank] + i;
4536     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4537     bnzi = bi[i+1] - bi[i];
4538     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4539 
4540     /* add local non-zero vals of this proc's seqmat into ba */
4541     anzi = ai[arow+1] - ai[arow];
4542     aj   = a->j + ai[arow];
4543     aa   = a->a + ai[arow];
4544     nextaj = 0;
4545     for (j=0; nextaj<anzi; j++){
4546       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4547         ba_i[j] += aa[nextaj++];
4548       }
4549     }
4550 
4551     /* add received vals into ba */
4552     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4553       /* i-th row */
4554       if (i == *nextrow[k]) {
4555         anzi = *(nextai[k]+1) - *nextai[k];
4556         aj   = buf_rj[k] + *(nextai[k]);
4557         aa   = abuf_r[k] + *(nextai[k]);
4558         nextaj = 0;
4559         for (j=0; nextaj<anzi; j++){
4560           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4561             ba_i[j] += aa[nextaj++];
4562           }
4563         }
4564         nextrow[k]++; nextai[k]++;
4565       }
4566     }
4567     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4568   }
4569   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4570   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4571 
4572   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4573   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4574   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4575   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4576   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4577   PetscFunctionReturn(0);
4578 }
4579 
4580 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4581 
4582 #undef __FUNCT__
4583 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4584 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4585 {
4586   PetscErrorCode       ierr;
4587   Mat                  B_mpi;
4588   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4589   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4590   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4591   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4592   PetscInt             len,proc,*dnz,*onz;
4593   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4594   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4595   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4596   MPI_Status           *status;
4597   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4598   PetscBT              lnkbt;
4599   Mat_Merge_SeqsToMPI  *merge;
4600   PetscContainer       container;
4601 
4602   PetscFunctionBegin;
4603   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4604 
4605   /* make sure it is a PETSc comm */
4606   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4607   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4608   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4609 
4610   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4611   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4612 
4613   /* determine row ownership */
4614   /*---------------------------------------------------------*/
4615   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4616   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4617   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4618   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4619   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4620   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4621   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4622 
4623   m      = merge->rowmap->n;
4624   M      = merge->rowmap->N;
4625   owners = merge->rowmap->range;
4626 
4627   /* determine the number of messages to send, their lengths */
4628   /*---------------------------------------------------------*/
4629   len_s  = merge->len_s;
4630 
4631   len = 0;  /* length of buf_si[] */
4632   merge->nsend = 0;
4633   for (proc=0; proc<size; proc++){
4634     len_si[proc] = 0;
4635     if (proc == rank){
4636       len_s[proc] = 0;
4637     } else {
4638       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4639       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4640     }
4641     if (len_s[proc]) {
4642       merge->nsend++;
4643       nrows = 0;
4644       for (i=owners[proc]; i<owners[proc+1]; i++){
4645         if (ai[i+1] > ai[i]) nrows++;
4646       }
4647       len_si[proc] = 2*(nrows+1);
4648       len += len_si[proc];
4649     }
4650   }
4651 
4652   /* determine the number and length of messages to receive for ij-structure */
4653   /*-------------------------------------------------------------------------*/
4654   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4655   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4656 
4657   /* post the Irecv of j-structure */
4658   /*-------------------------------*/
4659   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4660   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4661 
4662   /* post the Isend of j-structure */
4663   /*--------------------------------*/
4664   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4665 
4666   for (proc=0, k=0; proc<size; proc++){
4667     if (!len_s[proc]) continue;
4668     i = owners[proc];
4669     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4670     k++;
4671   }
4672 
4673   /* receives and sends of j-structure are complete */
4674   /*------------------------------------------------*/
4675   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4676   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4677 
4678   /* send and recv i-structure */
4679   /*---------------------------*/
4680   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4681   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4682 
4683   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4684   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4685   for (proc=0,k=0; proc<size; proc++){
4686     if (!len_s[proc]) continue;
4687     /* form outgoing message for i-structure:
4688          buf_si[0]:                 nrows to be sent
4689                [1:nrows]:           row index (global)
4690                [nrows+1:2*nrows+1]: i-structure index
4691     */
4692     /*-------------------------------------------*/
4693     nrows = len_si[proc]/2 - 1;
4694     buf_si_i    = buf_si + nrows+1;
4695     buf_si[0]   = nrows;
4696     buf_si_i[0] = 0;
4697     nrows = 0;
4698     for (i=owners[proc]; i<owners[proc+1]; i++){
4699       anzi = ai[i+1] - ai[i];
4700       if (anzi) {
4701         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4702         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4703         nrows++;
4704       }
4705     }
4706     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4707     k++;
4708     buf_si += len_si[proc];
4709   }
4710 
4711   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4712   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4713 
4714   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4715   for (i=0; i<merge->nrecv; i++){
4716     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4717   }
4718 
4719   ierr = PetscFree(len_si);CHKERRQ(ierr);
4720   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4721   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4722   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4723   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4724   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4725   ierr = PetscFree(status);CHKERRQ(ierr);
4726 
4727   /* compute a local seq matrix in each processor */
4728   /*----------------------------------------------*/
4729   /* allocate bi array and free space for accumulating nonzero column info */
4730   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4731   bi[0] = 0;
4732 
4733   /* create and initialize a linked list */
4734   nlnk = N+1;
4735   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4736 
4737   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4738   len = 0;
4739   len  = ai[owners[rank+1]] - ai[owners[rank]];
4740   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4741   current_space = free_space;
4742 
4743   /* determine symbolic info for each local row */
4744   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4745 
4746   for (k=0; k<merge->nrecv; k++){
4747     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4748     nrows = *buf_ri_k[k];
4749     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4750     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4751   }
4752 
4753   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4754   len = 0;
4755   for (i=0;i<m;i++) {
4756     bnzi   = 0;
4757     /* add local non-zero cols of this proc's seqmat into lnk */
4758     arow   = owners[rank] + i;
4759     anzi   = ai[arow+1] - ai[arow];
4760     aj     = a->j + ai[arow];
4761     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4762     bnzi += nlnk;
4763     /* add received col data into lnk */
4764     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4765       if (i == *nextrow[k]) { /* i-th row */
4766         anzi = *(nextai[k]+1) - *nextai[k];
4767         aj   = buf_rj[k] + *nextai[k];
4768         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4769         bnzi += nlnk;
4770         nextrow[k]++; nextai[k]++;
4771       }
4772     }
4773     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4774 
4775     /* if free space is not available, make more free space */
4776     if (current_space->local_remaining<bnzi) {
4777       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4778       nspacedouble++;
4779     }
4780     /* copy data into free space, then initialize lnk */
4781     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4782     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4783 
4784     current_space->array           += bnzi;
4785     current_space->local_used      += bnzi;
4786     current_space->local_remaining -= bnzi;
4787 
4788     bi[i+1] = bi[i] + bnzi;
4789   }
4790 
4791   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4792 
4793   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4794   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4795   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4796 
4797   /* create symbolic parallel matrix B_mpi */
4798   /*---------------------------------------*/
4799   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4800   if (n==PETSC_DECIDE) {
4801     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4802   } else {
4803     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4804   }
4805   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4806   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4807   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4808 
4809   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4810   B_mpi->assembled     = PETSC_FALSE;
4811   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4812   merge->bi            = bi;
4813   merge->bj            = bj;
4814   merge->buf_ri        = buf_ri;
4815   merge->buf_rj        = buf_rj;
4816   merge->coi           = PETSC_NULL;
4817   merge->coj           = PETSC_NULL;
4818   merge->owners_co     = PETSC_NULL;
4819 
4820   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4821 
4822   /* attach the supporting struct to B_mpi for reuse */
4823   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4824   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4825   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4826   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4827   *mpimat = B_mpi;
4828 
4829   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4830   PetscFunctionReturn(0);
4831 }
4832 
4833 #undef __FUNCT__
4834 #define __FUNCT__ "MatMerge_SeqsToMPI"
4835 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4836 {
4837   PetscErrorCode   ierr;
4838 
4839   PetscFunctionBegin;
4840   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4841   if (scall == MAT_INITIAL_MATRIX){
4842     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4843   }
4844   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4845   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4846   PetscFunctionReturn(0);
4847 }
4848 
4849 #undef __FUNCT__
4850 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4851 /*@
4852      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4853           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4854           with MatGetSize()
4855 
4856     Not Collective
4857 
4858    Input Parameters:
4859 +    A - the matrix
4860 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4861 
4862    Output Parameter:
4863 .    A_loc - the local sequential matrix generated
4864 
4865     Level: developer
4866 
4867 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4868 
4869 @*/
4870 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4871 {
4872   PetscErrorCode  ierr;
4873   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4874   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4875   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4876   MatScalar       *aa=a->a,*ba=b->a,*cam;
4877   PetscScalar     *ca;
4878   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4879   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4880   PetscBool       match;
4881 
4882   PetscFunctionBegin;
4883   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4884   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4885   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4886   if (scall == MAT_INITIAL_MATRIX){
4887     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4888     ci[0] = 0;
4889     for (i=0; i<am; i++){
4890       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4891     }
4892     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4893     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4894     k = 0;
4895     for (i=0; i<am; i++) {
4896       ncols_o = bi[i+1] - bi[i];
4897       ncols_d = ai[i+1] - ai[i];
4898       /* off-diagonal portion of A */
4899       for (jo=0; jo<ncols_o; jo++) {
4900         col = cmap[*bj];
4901         if (col >= cstart) break;
4902         cj[k]   = col; bj++;
4903         ca[k++] = *ba++;
4904       }
4905       /* diagonal portion of A */
4906       for (j=0; j<ncols_d; j++) {
4907         cj[k]   = cstart + *aj++;
4908         ca[k++] = *aa++;
4909       }
4910       /* off-diagonal portion of A */
4911       for (j=jo; j<ncols_o; j++) {
4912         cj[k]   = cmap[*bj++];
4913         ca[k++] = *ba++;
4914       }
4915     }
4916     /* put together the new matrix */
4917     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4918     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4919     /* Since these are PETSc arrays, change flags to free them as necessary. */
4920     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4921     mat->free_a  = PETSC_TRUE;
4922     mat->free_ij = PETSC_TRUE;
4923     mat->nonew   = 0;
4924   } else if (scall == MAT_REUSE_MATRIX){
4925     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4926     ci = mat->i; cj = mat->j; cam = mat->a;
4927     for (i=0; i<am; i++) {
4928       /* off-diagonal portion of A */
4929       ncols_o = bi[i+1] - bi[i];
4930       for (jo=0; jo<ncols_o; jo++) {
4931         col = cmap[*bj];
4932         if (col >= cstart) break;
4933         *cam++ = *ba++; bj++;
4934       }
4935       /* diagonal portion of A */
4936       ncols_d = ai[i+1] - ai[i];
4937       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4938       /* off-diagonal portion of A */
4939       for (j=jo; j<ncols_o; j++) {
4940         *cam++ = *ba++; bj++;
4941       }
4942     }
4943   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4944   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4945   PetscFunctionReturn(0);
4946 }
4947 
4948 #undef __FUNCT__
4949 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4950 /*@C
4951      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4952 
4953     Not Collective
4954 
4955    Input Parameters:
4956 +    A - the matrix
4957 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4958 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4959 
4960    Output Parameter:
4961 .    A_loc - the local sequential matrix generated
4962 
4963     Level: developer
4964 
4965 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4966 
4967 @*/
4968 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4969 {
4970   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4971   PetscErrorCode    ierr;
4972   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4973   IS                isrowa,iscola;
4974   Mat               *aloc;
4975   PetscBool       match;
4976 
4977   PetscFunctionBegin;
4978   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4979   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4980   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4981   if (!row){
4982     start = A->rmap->rstart; end = A->rmap->rend;
4983     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4984   } else {
4985     isrowa = *row;
4986   }
4987   if (!col){
4988     start = A->cmap->rstart;
4989     cmap  = a->garray;
4990     nzA   = a->A->cmap->n;
4991     nzB   = a->B->cmap->n;
4992     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4993     ncols = 0;
4994     for (i=0; i<nzB; i++) {
4995       if (cmap[i] < start) idx[ncols++] = cmap[i];
4996       else break;
4997     }
4998     imark = i;
4999     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5000     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5001     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5002   } else {
5003     iscola = *col;
5004   }
5005   if (scall != MAT_INITIAL_MATRIX){
5006     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5007     aloc[0] = *A_loc;
5008   }
5009   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5010   *A_loc = aloc[0];
5011   ierr = PetscFree(aloc);CHKERRQ(ierr);
5012   if (!row){
5013     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5014   }
5015   if (!col){
5016     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5017   }
5018   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatGetBrowsOfAcols"
5024 /*@C
5025     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5026 
5027     Collective on Mat
5028 
5029    Input Parameters:
5030 +    A,B - the matrices in mpiaij format
5031 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5032 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5033 
5034    Output Parameter:
5035 +    rowb, colb - index sets of rows and columns of B to extract
5036 -    B_seq - the sequential matrix generated
5037 
5038     Level: developer
5039 
5040 @*/
5041 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5042 {
5043   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5044   PetscErrorCode    ierr;
5045   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5046   IS                isrowb,iscolb;
5047   Mat               *bseq=PETSC_NULL;
5048 
5049   PetscFunctionBegin;
5050   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5051     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5052   }
5053   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5054 
5055   if (scall == MAT_INITIAL_MATRIX){
5056     start = A->cmap->rstart;
5057     cmap  = a->garray;
5058     nzA   = a->A->cmap->n;
5059     nzB   = a->B->cmap->n;
5060     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5061     ncols = 0;
5062     for (i=0; i<nzB; i++) {  /* row < local row index */
5063       if (cmap[i] < start) idx[ncols++] = cmap[i];
5064       else break;
5065     }
5066     imark = i;
5067     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5068     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5069     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5070     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5071   } else {
5072     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5073     isrowb = *rowb; iscolb = *colb;
5074     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5075     bseq[0] = *B_seq;
5076   }
5077   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5078   *B_seq = bseq[0];
5079   ierr = PetscFree(bseq);CHKERRQ(ierr);
5080   if (!rowb){
5081     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5082   } else {
5083     *rowb = isrowb;
5084   }
5085   if (!colb){
5086     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5087   } else {
5088     *colb = iscolb;
5089   }
5090   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 #undef __FUNCT__
5095 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5096 /*
5097     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5098     of the OFF-DIAGONAL portion of local A
5099 
5100     Collective on Mat
5101 
5102    Input Parameters:
5103 +    A,B - the matrices in mpiaij format
5104 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5105 
5106    Output Parameter:
5107 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5108 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5109 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5110 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5111 
5112     Level: developer
5113 
5114 */
5115 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5116 {
5117   VecScatter_MPI_General *gen_to,*gen_from;
5118   PetscErrorCode         ierr;
5119   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5120   Mat_SeqAIJ             *b_oth;
5121   VecScatter             ctx=a->Mvctx;
5122   MPI_Comm               comm=((PetscObject)ctx)->comm;
5123   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5124   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5125   PetscScalar            *rvalues,*svalues;
5126   MatScalar              *b_otha,*bufa,*bufA;
5127   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5128   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5129   MPI_Status             *sstatus,rstatus;
5130   PetscMPIInt            jj;
5131   PetscInt               *cols,sbs,rbs;
5132   PetscScalar            *vals;
5133 
5134   PetscFunctionBegin;
5135   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5136     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5137   }
5138   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5139   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5140 
5141   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5142   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5143   rvalues  = gen_from->values; /* holds the length of receiving row */
5144   svalues  = gen_to->values;   /* holds the length of sending row */
5145   nrecvs   = gen_from->n;
5146   nsends   = gen_to->n;
5147 
5148   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5149   srow     = gen_to->indices;   /* local row index to be sent */
5150   sstarts  = gen_to->starts;
5151   sprocs   = gen_to->procs;
5152   sstatus  = gen_to->sstatus;
5153   sbs      = gen_to->bs;
5154   rstarts  = gen_from->starts;
5155   rprocs   = gen_from->procs;
5156   rbs      = gen_from->bs;
5157 
5158   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5159   if (scall == MAT_INITIAL_MATRIX){
5160     /* i-array */
5161     /*---------*/
5162     /*  post receives */
5163     for (i=0; i<nrecvs; i++){
5164       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5165       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5166       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5167     }
5168 
5169     /* pack the outgoing message */
5170     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5171     sstartsj[0] = 0;  rstartsj[0] = 0;
5172     len = 0; /* total length of j or a array to be sent */
5173     k = 0;
5174     for (i=0; i<nsends; i++){
5175       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5176       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5177       for (j=0; j<nrows; j++) {
5178         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5179         for (l=0; l<sbs; l++){
5180           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5181           rowlen[j*sbs+l] = ncols;
5182           len += ncols;
5183           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5184         }
5185         k++;
5186       }
5187       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5188       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5189     }
5190     /* recvs and sends of i-array are completed */
5191     i = nrecvs;
5192     while (i--) {
5193       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5194     }
5195     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5196 
5197     /* allocate buffers for sending j and a arrays */
5198     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5199     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5200 
5201     /* create i-array of B_oth */
5202     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5203     b_othi[0] = 0;
5204     len = 0; /* total length of j or a array to be received */
5205     k = 0;
5206     for (i=0; i<nrecvs; i++){
5207       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5208       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5209       for (j=0; j<nrows; j++) {
5210         b_othi[k+1] = b_othi[k] + rowlen[j];
5211         len += rowlen[j]; k++;
5212       }
5213       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5214     }
5215 
5216     /* allocate space for j and a arrrays of B_oth */
5217     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5218     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5219 
5220     /* j-array */
5221     /*---------*/
5222     /*  post receives of j-array */
5223     for (i=0; i<nrecvs; i++){
5224       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5225       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5226     }
5227 
5228     /* pack the outgoing message j-array */
5229     k = 0;
5230     for (i=0; i<nsends; i++){
5231       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5232       bufJ = bufj+sstartsj[i];
5233       for (j=0; j<nrows; j++) {
5234         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5235         for (ll=0; ll<sbs; ll++){
5236           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5237           for (l=0; l<ncols; l++){
5238             *bufJ++ = cols[l];
5239           }
5240           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5241         }
5242       }
5243       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5244     }
5245 
5246     /* recvs and sends of j-array are completed */
5247     i = nrecvs;
5248     while (i--) {
5249       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5250     }
5251     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5252   } else if (scall == MAT_REUSE_MATRIX){
5253     sstartsj = *startsj_s;
5254     rstartsj = *startsj_r;
5255     bufa     = *bufa_ptr;
5256     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5257     b_otha   = b_oth->a;
5258   } else {
5259     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5260   }
5261 
5262   /* a-array */
5263   /*---------*/
5264   /*  post receives of a-array */
5265   for (i=0; i<nrecvs; i++){
5266     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5267     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5268   }
5269 
5270   /* pack the outgoing message a-array */
5271   k = 0;
5272   for (i=0; i<nsends; i++){
5273     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5274     bufA = bufa+sstartsj[i];
5275     for (j=0; j<nrows; j++) {
5276       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5277       for (ll=0; ll<sbs; ll++){
5278         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5279         for (l=0; l<ncols; l++){
5280           *bufA++ = vals[l];
5281         }
5282         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5283       }
5284     }
5285     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5286   }
5287   /* recvs and sends of a-array are completed */
5288   i = nrecvs;
5289   while (i--) {
5290     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5291   }
5292   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5293   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5294 
5295   if (scall == MAT_INITIAL_MATRIX){
5296     /* put together the new matrix */
5297     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5298 
5299     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5300     /* Since these are PETSc arrays, change flags to free them as necessary. */
5301     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5302     b_oth->free_a  = PETSC_TRUE;
5303     b_oth->free_ij = PETSC_TRUE;
5304     b_oth->nonew   = 0;
5305 
5306     ierr = PetscFree(bufj);CHKERRQ(ierr);
5307     if (!startsj_s || !bufa_ptr){
5308       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5309       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5310     } else {
5311       *startsj_s = sstartsj;
5312       *startsj_r = rstartsj;
5313       *bufa_ptr  = bufa;
5314     }
5315   }
5316   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5317   PetscFunctionReturn(0);
5318 }
5319 
5320 #undef __FUNCT__
5321 #define __FUNCT__ "MatGetCommunicationStructs"
5322 /*@C
5323   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5324 
5325   Not Collective
5326 
5327   Input Parameters:
5328 . A - The matrix in mpiaij format
5329 
5330   Output Parameter:
5331 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5332 . colmap - A map from global column index to local index into lvec
5333 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5334 
5335   Level: developer
5336 
5337 @*/
5338 #if defined (PETSC_USE_CTABLE)
5339 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5340 #else
5341 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5342 #endif
5343 {
5344   Mat_MPIAIJ *a;
5345 
5346   PetscFunctionBegin;
5347   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5348   PetscValidPointer(lvec, 2);
5349   PetscValidPointer(colmap, 3);
5350   PetscValidPointer(multScatter, 4);
5351   a = (Mat_MPIAIJ *) A->data;
5352   if (lvec) *lvec = a->lvec;
5353   if (colmap) *colmap = a->colmap;
5354   if (multScatter) *multScatter = a->Mvctx;
5355   PetscFunctionReturn(0);
5356 }
5357 
5358 EXTERN_C_BEGIN
5359 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5360 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5361 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5362 EXTERN_C_END
5363 
5364 #undef __FUNCT__
5365 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5366 /*
5367     Computes (B'*A')' since computing B*A directly is untenable
5368 
5369                n                       p                          p
5370         (              )       (              )         (                  )
5371       m (      A       )  *  n (       B      )   =   m (         C        )
5372         (              )       (              )         (                  )
5373 
5374 */
5375 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5376 {
5377   PetscErrorCode     ierr;
5378   Mat                At,Bt,Ct;
5379 
5380   PetscFunctionBegin;
5381   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5382   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5383   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5384   ierr = MatDestroy(&At);CHKERRQ(ierr);
5385   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5386   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5387   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5388   PetscFunctionReturn(0);
5389 }
5390 
5391 #undef __FUNCT__
5392 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5393 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5394 {
5395   PetscErrorCode ierr;
5396   PetscInt       m=A->rmap->n,n=B->cmap->n;
5397   Mat            Cmat;
5398 
5399   PetscFunctionBegin;
5400   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5401   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5402   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5403   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5404   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5405   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5406   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5407   *C   = Cmat;
5408   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5409   PetscFunctionReturn(0);
5410 }
5411 
5412 /* ----------------------------------------------------------------*/
5413 #undef __FUNCT__
5414 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5415 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5416 {
5417   PetscErrorCode ierr;
5418 
5419   PetscFunctionBegin;
5420   if (scall == MAT_INITIAL_MATRIX){
5421     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5422   }
5423   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5424   PetscFunctionReturn(0);
5425 }
5426 
5427 EXTERN_C_BEGIN
5428 #if defined(PETSC_HAVE_MUMPS)
5429 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5430 #endif
5431 #if defined(PETSC_HAVE_PASTIX)
5432 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5433 #endif
5434 #if defined(PETSC_HAVE_SUPERLU_DIST)
5435 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5436 #endif
5437 #if defined(PETSC_HAVE_SPOOLES)
5438 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5439 #endif
5440 EXTERN_C_END
5441 
5442 /*MC
5443    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5444 
5445    Options Database Keys:
5446 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5447 
5448   Level: beginner
5449 
5450 .seealso: MatCreateMPIAIJ()
5451 M*/
5452 
5453 EXTERN_C_BEGIN
5454 #undef __FUNCT__
5455 #define __FUNCT__ "MatCreate_MPIAIJ"
5456 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5457 {
5458   Mat_MPIAIJ     *b;
5459   PetscErrorCode ierr;
5460   PetscMPIInt    size;
5461 
5462   PetscFunctionBegin;
5463   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5464 
5465   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5466   B->data         = (void*)b;
5467   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5468   B->rmap->bs     = 1;
5469   B->assembled    = PETSC_FALSE;
5470 
5471   B->insertmode   = NOT_SET_VALUES;
5472   b->size         = size;
5473   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5474 
5475   /* build cache for off array entries formed */
5476   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5477   b->donotstash  = PETSC_FALSE;
5478   b->colmap      = 0;
5479   b->garray      = 0;
5480   b->roworiented = PETSC_TRUE;
5481 
5482   /* stuff used for matrix vector multiply */
5483   b->lvec      = PETSC_NULL;
5484   b->Mvctx     = PETSC_NULL;
5485 
5486   /* stuff for MatGetRow() */
5487   b->rowindices   = 0;
5488   b->rowvalues    = 0;
5489   b->getrowactive = PETSC_FALSE;
5490 
5491 #if defined(PETSC_HAVE_SPOOLES)
5492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5493                                      "MatGetFactor_mpiaij_spooles",
5494                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5495 #endif
5496 #if defined(PETSC_HAVE_MUMPS)
5497   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5498                                      "MatGetFactor_aij_mumps",
5499                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5500 #endif
5501 #if defined(PETSC_HAVE_PASTIX)
5502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5503 					   "MatGetFactor_mpiaij_pastix",
5504 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5505 #endif
5506 #if defined(PETSC_HAVE_SUPERLU_DIST)
5507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5508                                      "MatGetFactor_mpiaij_superlu_dist",
5509                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5510 #endif
5511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5512                                      "MatStoreValues_MPIAIJ",
5513                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5515                                      "MatRetrieveValues_MPIAIJ",
5516                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5518 				     "MatGetDiagonalBlock_MPIAIJ",
5519                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5521 				     "MatIsTranspose_MPIAIJ",
5522 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5523   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5524 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5525 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5527 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5528 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5529   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5530 				     "MatDiagonalScaleLocal_MPIAIJ",
5531 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5533                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5534                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5535   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5536                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5537                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5538   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5539                                      "MatConvert_MPIAIJ_MPISBAIJ",
5540                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5541   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5542                                      "MatMatMult_MPIDense_MPIAIJ",
5543                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5544   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5545                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5546                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5547   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5548                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5549                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5550   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5551   PetscFunctionReturn(0);
5552 }
5553 EXTERN_C_END
5554 
5555 #undef __FUNCT__
5556 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5557 /*@
5558      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5559          and "off-diagonal" part of the matrix in CSR format.
5560 
5561    Collective on MPI_Comm
5562 
5563    Input Parameters:
5564 +  comm - MPI communicator
5565 .  m - number of local rows (Cannot be PETSC_DECIDE)
5566 .  n - This value should be the same as the local size used in creating the
5567        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5568        calculated if N is given) For square matrices n is almost always m.
5569 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5570 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5571 .   i - row indices for "diagonal" portion of matrix
5572 .   j - column indices
5573 .   a - matrix values
5574 .   oi - row indices for "off-diagonal" portion of matrix
5575 .   oj - column indices
5576 -   oa - matrix values
5577 
5578    Output Parameter:
5579 .   mat - the matrix
5580 
5581    Level: advanced
5582 
5583    Notes:
5584        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5585        must free the arrays once the matrix has been destroyed and not before.
5586 
5587        The i and j indices are 0 based
5588 
5589        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5590 
5591        This sets local rows and cannot be used to set off-processor values.
5592 
5593        You cannot later use MatSetValues() to change values in this matrix.
5594 
5595 .keywords: matrix, aij, compressed row, sparse, parallel
5596 
5597 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5598           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5599 @*/
5600 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5601 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5602 {
5603   PetscErrorCode ierr;
5604   Mat_MPIAIJ     *maij;
5605 
5606  PetscFunctionBegin;
5607   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5608   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5609   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5610   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5611   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5612   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5613   maij = (Mat_MPIAIJ*) (*mat)->data;
5614   maij->donotstash     = PETSC_TRUE;
5615   (*mat)->preallocated = PETSC_TRUE;
5616 
5617   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5618   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5619   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5620   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5621 
5622   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5623   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5624 
5625   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5626   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5627   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5629 
5630   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5631   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5632   PetscFunctionReturn(0);
5633 }
5634 
5635 /*
5636     Special version for direct calls from Fortran
5637 */
5638 #include <private/fortranimpl.h>
5639 
5640 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5641 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5642 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5643 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5644 #endif
5645 
5646 /* Change these macros so can be used in void function */
5647 #undef CHKERRQ
5648 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5649 #undef SETERRQ2
5650 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5651 #undef SETERRQ3
5652 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5653 #undef SETERRQ
5654 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5655 
5656 EXTERN_C_BEGIN
5657 #undef __FUNCT__
5658 #define __FUNCT__ "matsetvaluesmpiaij_"
5659 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5660 {
5661   Mat             mat = *mmat;
5662   PetscInt        m = *mm, n = *mn;
5663   InsertMode      addv = *maddv;
5664   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5665   PetscScalar     value;
5666   PetscErrorCode  ierr;
5667 
5668   MatCheckPreallocated(mat,1);
5669   if (mat->insertmode == NOT_SET_VALUES) {
5670     mat->insertmode = addv;
5671   }
5672 #if defined(PETSC_USE_DEBUG)
5673   else if (mat->insertmode != addv) {
5674     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5675   }
5676 #endif
5677   {
5678   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5679   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5680   PetscBool       roworiented = aij->roworiented;
5681 
5682   /* Some Variables required in the macro */
5683   Mat             A = aij->A;
5684   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5685   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5686   MatScalar       *aa = a->a;
5687   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5688   Mat             B = aij->B;
5689   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5690   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5691   MatScalar       *ba = b->a;
5692 
5693   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5694   PetscInt        nonew = a->nonew;
5695   MatScalar       *ap1,*ap2;
5696 
5697   PetscFunctionBegin;
5698   for (i=0; i<m; i++) {
5699     if (im[i] < 0) continue;
5700 #if defined(PETSC_USE_DEBUG)
5701     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5702 #endif
5703     if (im[i] >= rstart && im[i] < rend) {
5704       row      = im[i] - rstart;
5705       lastcol1 = -1;
5706       rp1      = aj + ai[row];
5707       ap1      = aa + ai[row];
5708       rmax1    = aimax[row];
5709       nrow1    = ailen[row];
5710       low1     = 0;
5711       high1    = nrow1;
5712       lastcol2 = -1;
5713       rp2      = bj + bi[row];
5714       ap2      = ba + bi[row];
5715       rmax2    = bimax[row];
5716       nrow2    = bilen[row];
5717       low2     = 0;
5718       high2    = nrow2;
5719 
5720       for (j=0; j<n; j++) {
5721         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5722         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5723         if (in[j] >= cstart && in[j] < cend){
5724           col = in[j] - cstart;
5725           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5726         } else if (in[j] < 0) continue;
5727 #if defined(PETSC_USE_DEBUG)
5728         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5729 #endif
5730         else {
5731           if (mat->was_assembled) {
5732             if (!aij->colmap) {
5733               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5734             }
5735 #if defined (PETSC_USE_CTABLE)
5736             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5737 	    col--;
5738 #else
5739             col = aij->colmap[in[j]] - 1;
5740 #endif
5741             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5742               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5743               col =  in[j];
5744               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5745               B = aij->B;
5746               b = (Mat_SeqAIJ*)B->data;
5747               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5748               rp2      = bj + bi[row];
5749               ap2      = ba + bi[row];
5750               rmax2    = bimax[row];
5751               nrow2    = bilen[row];
5752               low2     = 0;
5753               high2    = nrow2;
5754               bm       = aij->B->rmap->n;
5755               ba = b->a;
5756             }
5757           } else col = in[j];
5758           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5759         }
5760       }
5761     } else {
5762       if (!aij->donotstash) {
5763         if (roworiented) {
5764           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5765         } else {
5766           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5767         }
5768       }
5769     }
5770   }}
5771   PetscFunctionReturnVoid();
5772 }
5773 EXTERN_C_END
5774 
5775