xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 24afaf4d43a57db9cf54f8efd3ed5d085f9ec091)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
334 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
342 #if defined (PETSC_USE_CTABLE)
343   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
344   for (i=0; i<n; i++){
345     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
346   }
347 #else
348   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
349   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
352 #endif
353   PetscFunctionReturn(0);
354 }
355 
356 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
357 { \
358     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
359     lastcol1 = col;\
360     while (high1-low1 > 5) { \
361       t = (low1+high1)/2; \
362       if (rp1[t] > col) high1 = t; \
363       else             low1  = t; \
364     } \
365       for (_i=low1; _i<high1; _i++) { \
366         if (rp1[_i] > col) break; \
367         if (rp1[_i] == col) { \
368           if (addv == ADD_VALUES) ap1[_i] += value;   \
369           else                    ap1[_i] = value; \
370           goto a_noinsert; \
371         } \
372       }  \
373       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
374       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
375       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
376       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
377       N = nrow1++ - 1; a->nz++; high1++; \
378       /* shift up all the later entries in this row */ \
379       for (ii=N; ii>=_i; ii--) { \
380         rp1[ii+1] = rp1[ii]; \
381         ap1[ii+1] = ap1[ii]; \
382       } \
383       rp1[_i] = col;  \
384       ap1[_i] = value;  \
385       a_noinsert: ; \
386       ailen[row] = nrow1; \
387 }
388 
389 
390 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
391 { \
392     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
393     lastcol2 = col;\
394     while (high2-low2 > 5) { \
395       t = (low2+high2)/2; \
396       if (rp2[t] > col) high2 = t; \
397       else             low2  = t; \
398     } \
399     for (_i=low2; _i<high2; _i++) {		\
400       if (rp2[_i] > col) break;			\
401       if (rp2[_i] == col) {			      \
402 	if (addv == ADD_VALUES) ap2[_i] += value;     \
403 	else                    ap2[_i] = value;      \
404 	goto b_noinsert;			      \
405       }						      \
406     }							      \
407     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
408     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
409     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
410     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
411     N = nrow2++ - 1; b->nz++; high2++;					\
412     /* shift up all the later entries in this row */			\
413     for (ii=N; ii>=_i; ii--) {						\
414       rp2[ii+1] = rp2[ii];						\
415       ap2[ii+1] = ap2[ii];						\
416     }									\
417     rp2[_i] = col;							\
418     ap2[_i] = value;							\
419     b_noinsert: ;								\
420     bilen[row] = nrow2;							\
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
425 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
426 {
427   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
428   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
429   PetscErrorCode ierr;
430   PetscInt       l,*garray = mat->garray,diag;
431 
432   PetscFunctionBegin;
433   /* code only works for square matrices A */
434 
435   /* find size of row to the left of the diagonal part */
436   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
437   row  = row - diag;
438   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
439     if (garray[b->j[b->i[row]+l]] > diag) break;
440   }
441   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
442 
443   /* diagonal part */
444   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
445 
446   /* right of diagonal part */
447   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValues_MPIAIJ"
453 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscScalar    value;
457   PetscErrorCode ierr;
458   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
459   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
460   PetscBool      roworiented = aij->roworiented;
461 
462   /* Some Variables required in the macro */
463   Mat            A = aij->A;
464   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
465   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
466   MatScalar      *aa = a->a;
467   PetscBool      ignorezeroentries = a->ignorezeroentries;
468   Mat            B = aij->B;
469   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
470   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
471   MatScalar      *ba = b->a;
472 
473   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
474   PetscInt       nonew;
475   MatScalar      *ap1,*ap2;
476 
477   PetscFunctionBegin;
478   if (v) PetscValidScalarPointer(v,6);
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_DEBUG)
482     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row      = im[i] - rstart;
486       lastcol1 = -1;
487       rp1      = aj + ai[row];
488       ap1      = aa + ai[row];
489       rmax1    = aimax[row];
490       nrow1    = ailen[row];
491       low1     = 0;
492       high1    = nrow1;
493       lastcol2 = -1;
494       rp2      = bj + bi[row];
495       ap2      = ba + bi[row];
496       rmax2    = bimax[row];
497       nrow2    = bilen[row];
498       low2     = 0;
499       high2    = nrow2;
500 
501       for (j=0; j<n; j++) {
502         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
503         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
504         if (in[j] >= cstart && in[j] < cend){
505           col = in[j] - cstart;
506           nonew = a->nonew;
507           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
508         } else if (in[j] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
511 #endif
512         else {
513           if (mat->was_assembled) {
514             if (!aij->colmap) {
515               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
516             }
517 #if defined (PETSC_USE_CTABLE)
518             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
519 	    col--;
520 #else
521             col = aij->colmap[in[j]] - 1;
522 #endif
523             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
524               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525               col =  in[j];
526               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
527               B = aij->B;
528               b = (Mat_SeqAIJ*)B->data;
529               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
530               rp2      = bj + bi[row];
531               ap2      = ba + bi[row];
532               rmax2    = bimax[row];
533               nrow2    = bilen[row];
534               low2     = 0;
535               high2    = nrow2;
536               bm       = aij->B->rmap->n;
537               ba = b->a;
538             }
539           } else col = in[j];
540           nonew = b->nonew;
541           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
542         }
543       }
544     } else {
545       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
546       if (!aij->donotstash) {
547         mat->assembled = PETSC_FALSE;
548         if (roworiented) {
549           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
550         } else {
551           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatGetValues_MPIAIJ"
561 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
562 {
563   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
564   PetscErrorCode ierr;
565   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
566   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
567 
568   PetscFunctionBegin;
569   for (i=0; i<m; i++) {
570     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
571     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
572     if (idxm[i] >= rstart && idxm[i] < rend) {
573       row = idxm[i] - rstart;
574       for (j=0; j<n; j++) {
575         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
576         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
577         if (idxn[j] >= cstart && idxn[j] < cend){
578           col = idxn[j] - cstart;
579           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
580         } else {
581           if (!aij->colmap) {
582             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
583           }
584 #if defined (PETSC_USE_CTABLE)
585           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
586           col --;
587 #else
588           col = aij->colmap[idxn[j]] - 1;
589 #endif
590           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
591           else {
592             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
593           }
594         }
595       }
596     } else {
597       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
598     }
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
607 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
608 {
609   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
610   PetscErrorCode ierr;
611   PetscInt       nstash,reallocs;
612   InsertMode     addv;
613 
614   PetscFunctionBegin;
615   if (aij->donotstash || mat->nooffprocentries) {
616     PetscFunctionReturn(0);
617   }
618 
619   /* make sure all processors are either in INSERTMODE or ADDMODE */
620   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
621   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
622   mat->insertmode = addv; /* in case this processor had no cache */
623 
624   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
625   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
626   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
627   PetscFunctionReturn(0);
628 }
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
632 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
636   PetscErrorCode ierr;
637   PetscMPIInt    n;
638   PetscInt       i,j,rstart,ncols,flg;
639   PetscInt       *row,*col;
640   PetscBool      other_disassembled;
641   PetscScalar    *val;
642   InsertMode     addv = mat->insertmode;
643 
644   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
645   PetscFunctionBegin;
646   if (!aij->donotstash && !mat->nooffprocentries) {
647     while (1) {
648       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
649       if (!flg) break;
650 
651       for (i=0; i<n;) {
652         /* Now identify the consecutive vals belonging to the same row */
653         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
654         if (j < n) ncols = j-i;
655         else       ncols = n-i;
656         /* Now assemble all these values with a single function call */
657         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
658         i = j;
659       }
660     }
661     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
662   }
663   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
664   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
665 
666   /* determine if any processor has disassembled, if so we must
667      also disassemble ourselfs, in order that we may reassemble. */
668   /*
669      if nonzero structure of submatrix B cannot change then we know that
670      no processor disassembled thus we can skip this stuff
671   */
672   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
673     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
674     if (mat->was_assembled && !other_disassembled) {
675       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
676     }
677   }
678   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
679     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
680   }
681   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
682   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
684   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
685 
686   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
687   aij->rowvalues = 0;
688 
689   /* used by MatAXPY() */
690   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
691   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
692 
693   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
694   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
700 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
701 {
702   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
703   PetscErrorCode ierr;
704 
705   PetscFunctionBegin;
706   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
707   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatZeroRows_MPIAIJ"
713 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
714 {
715   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode    ierr;
717   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
718   PetscInt          i,*owners = A->rmap->range;
719   PetscInt          *nprocs,j,idx,nsends,row;
720   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
721   PetscInt          *rvalues,count,base,slen,*source;
722   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
723   MPI_Comm          comm = ((PetscObject)A)->comm;
724   MPI_Request       *send_waits,*recv_waits;
725   MPI_Status        recv_status,*send_status;
726   const PetscScalar *xx;
727   PetscScalar       *bb;
728 #if defined(PETSC_DEBUG)
729   PetscBool      found = PETSC_FALSE;
730 #endif
731 
732   PetscFunctionBegin;
733   /*  first count number of contributors to each processor */
734   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
735   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
736   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
737   j = 0;
738   for (i=0; i<N; i++) {
739     if (lastidx > (idx = rows[i])) j = 0;
740     lastidx = idx;
741     for (; j<size; j++) {
742       if (idx >= owners[j] && idx < owners[j+1]) {
743         nprocs[2*j]++;
744         nprocs[2*j+1] = 1;
745         owner[i] = j;
746 #if defined(PETSC_DEBUG)
747         found = PETSC_TRUE;
748 #endif
749         break;
750       }
751     }
752 #if defined(PETSC_DEBUG)
753     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
754     found = PETSC_FALSE;
755 #endif
756   }
757   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
758 
759   if (A->nooffproczerorows) {
760     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
761     nrecvs = nsends;
762     nmax   = N;
763   } else {
764     /* inform other processors of number of messages and max length*/
765     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
766   }
767 
768   /* post receives:   */
769   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
770   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
771   for (i=0; i<nrecvs; i++) {
772     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
773   }
774 
775   /* do sends:
776       1) starts[i] gives the starting index in svalues for stuff going to
777          the ith processor
778   */
779   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
780   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
781   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
782   starts[0] = 0;
783   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
784   for (i=0; i<N; i++) {
785     svalues[starts[owner[i]]++] = rows[i];
786   }
787 
788   starts[0] = 0;
789   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
790   count = 0;
791   for (i=0; i<size; i++) {
792     if (nprocs[2*i+1]) {
793       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
794     }
795   }
796   ierr = PetscFree(starts);CHKERRQ(ierr);
797 
798   base = owners[rank];
799 
800   /*  wait on receives */
801   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
802   count  = nrecvs; slen = 0;
803   while (count) {
804     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
805     /* unpack receives into our local space */
806     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
807     source[imdex]  = recv_status.MPI_SOURCE;
808     lens[imdex]    = n;
809     slen          += n;
810     count--;
811   }
812   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
813 
814   /* move the data into the send scatter */
815   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
816   count = 0;
817   for (i=0; i<nrecvs; i++) {
818     values = rvalues + i*nmax;
819     for (j=0; j<lens[i]; j++) {
820       lrows[count++] = values[j] - base;
821     }
822   }
823   ierr = PetscFree(rvalues);CHKERRQ(ierr);
824   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
825   ierr = PetscFree(owner);CHKERRQ(ierr);
826   ierr = PetscFree(nprocs);CHKERRQ(ierr);
827 
828   /* fix right hand side if needed */
829   if (x && b) {
830     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
831     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
832     for (i=0; i<slen; i++) {
833       bb[lrows[i]] = diag*xx[lrows[i]];
834     }
835     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
836     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
837   }
838   /*
839         Zero the required rows. If the "diagonal block" of the matrix
840      is square and the user wishes to set the diagonal we use separate
841      code so that MatSetValues() is not called for each diagonal allocating
842      new memory, thus calling lots of mallocs and slowing things down.
843 
844   */
845   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
846   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
848     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
849   } else if (diag != 0.0) {
850     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
851     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
852       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
853 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
854     }
855     for (i = 0; i < slen; i++) {
856       row  = lrows[i] + rstart;
857       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
858     }
859     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
860     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861   } else {
862     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(lrows);CHKERRQ(ierr);
865 
866   /* wait on sends */
867   if (nsends) {
868     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
869     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
870     ierr = PetscFree(send_status);CHKERRQ(ierr);
871   }
872   ierr = PetscFree(send_waits);CHKERRQ(ierr);
873   ierr = PetscFree(svalues);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
879 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
880 {
881   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
882   PetscErrorCode    ierr;
883   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
884   PetscInt          i,*owners = A->rmap->range;
885   PetscInt          *nprocs,j,idx,nsends;
886   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
887   PetscInt          *rvalues,count,base,slen,*source;
888   PetscInt          *lens,*lrows,*values,m;
889   MPI_Comm          comm = ((PetscObject)A)->comm;
890   MPI_Request       *send_waits,*recv_waits;
891   MPI_Status        recv_status,*send_status;
892   const PetscScalar *xx;
893   PetscScalar       *bb,*mask;
894   Vec               xmask,lmask;
895   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
896   const PetscInt    *aj, *ii,*ridx;
897   PetscScalar       *aa;
898 #if defined(PETSC_DEBUG)
899   PetscBool         found = PETSC_FALSE;
900 #endif
901 
902   PetscFunctionBegin;
903   /*  first count number of contributors to each processor */
904   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
905   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
906   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
907   j = 0;
908   for (i=0; i<N; i++) {
909     if (lastidx > (idx = rows[i])) j = 0;
910     lastidx = idx;
911     for (; j<size; j++) {
912       if (idx >= owners[j] && idx < owners[j+1]) {
913         nprocs[2*j]++;
914         nprocs[2*j+1] = 1;
915         owner[i] = j;
916 #if defined(PETSC_DEBUG)
917         found = PETSC_TRUE;
918 #endif
919         break;
920       }
921     }
922 #if defined(PETSC_DEBUG)
923     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
924     found = PETSC_FALSE;
925 #endif
926   }
927   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
928 
929   /* inform other processors of number of messages and max length*/
930   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
931 
932   /* post receives:   */
933   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
934   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
935   for (i=0; i<nrecvs; i++) {
936     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
937   }
938 
939   /* do sends:
940       1) starts[i] gives the starting index in svalues for stuff going to
941          the ith processor
942   */
943   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
944   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
945   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
946   starts[0] = 0;
947   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
948   for (i=0; i<N; i++) {
949     svalues[starts[owner[i]]++] = rows[i];
950   }
951 
952   starts[0] = 0;
953   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
954   count = 0;
955   for (i=0; i<size; i++) {
956     if (nprocs[2*i+1]) {
957       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
958     }
959   }
960   ierr = PetscFree(starts);CHKERRQ(ierr);
961 
962   base = owners[rank];
963 
964   /*  wait on receives */
965   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
966   count  = nrecvs; slen = 0;
967   while (count) {
968     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
969     /* unpack receives into our local space */
970     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
971     source[imdex]  = recv_status.MPI_SOURCE;
972     lens[imdex]    = n;
973     slen          += n;
974     count--;
975   }
976   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
977 
978   /* move the data into the send scatter */
979   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
980   count = 0;
981   for (i=0; i<nrecvs; i++) {
982     values = rvalues + i*nmax;
983     for (j=0; j<lens[i]; j++) {
984       lrows[count++] = values[j] - base;
985     }
986   }
987   ierr = PetscFree(rvalues);CHKERRQ(ierr);
988   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
989   ierr = PetscFree(owner);CHKERRQ(ierr);
990   ierr = PetscFree(nprocs);CHKERRQ(ierr);
991   /* lrows are the local rows to be zeroed, slen is the number of local rows */
992 
993   /* zero diagonal part of matrix */
994   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
995 
996   /* handle off diagonal part of matrix */
997   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
998   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
999   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1000   for (i=0; i<slen; i++) {
1001     bb[lrows[i]] = 1;
1002   }
1003   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1004   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1007   if (x) {
1008     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1009     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1012   }
1013   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1014 
1015   /* remove zeroed rows of off diagonal matrix */
1016   ii = aij->i;
1017   for (i=0; i<slen; i++) {
1018     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1019   }
1020 
1021   /* loop over all elements of off process part of matrix zeroing removed columns*/
1022   if (aij->compressedrow.use){
1023     m    = aij->compressedrow.nrows;
1024     ii   = aij->compressedrow.i;
1025     ridx = aij->compressedrow.rindex;
1026     for (i=0; i<m; i++){
1027       n   = ii[i+1] - ii[i];
1028       aj  = aij->j + ii[i];
1029       aa  = aij->a + ii[i];
1030 
1031       for (j=0; j<n; j++) {
1032         if (PetscAbsScalar(mask[*aj])) {
1033           if (b) bb[*ridx] -= *aa*xx[*aj];
1034           *aa        = 0.0;
1035         }
1036         aa++;
1037         aj++;
1038       }
1039       ridx++;
1040     }
1041   } else { /* do not use compressed row format */
1042     m = l->B->rmap->n;
1043     for (i=0; i<m; i++) {
1044       n   = ii[i+1] - ii[i];
1045       aj  = aij->j + ii[i];
1046       aa  = aij->a + ii[i];
1047       for (j=0; j<n; j++) {
1048         if (PetscAbsScalar(mask[*aj])) {
1049           if (b) bb[i] -= *aa*xx[*aj];
1050           *aa    = 0.0;
1051         }
1052         aa++;
1053         aj++;
1054       }
1055     }
1056   }
1057   if (x) {
1058     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1059     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1060   }
1061   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1062   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1063   ierr = PetscFree(lrows);CHKERRQ(ierr);
1064 
1065   /* wait on sends */
1066   if (nsends) {
1067     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1068     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1069     ierr = PetscFree(send_status);CHKERRQ(ierr);
1070   }
1071   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1072   ierr = PetscFree(svalues);CHKERRQ(ierr);
1073 
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMult_MPIAIJ"
1079 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1080 {
1081   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       nt;
1084 
1085   PetscFunctionBegin;
1086   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1087   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1088   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1091   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1097 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1109 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1117   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1124 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1125 {
1126   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1127   PetscErrorCode ierr;
1128   PetscBool      merged;
1129 
1130   PetscFunctionBegin;
1131   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1132   /* do nondiagonal part */
1133   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1134   if (!merged) {
1135     /* send it on its way */
1136     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137     /* do local part */
1138     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1139     /* receive remote parts: note this assumes the values are not actually */
1140     /* added in yy until the next line, */
1141     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   } else {
1143     /* do local part */
1144     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1145     /* send it on its way */
1146     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1147     /* values actually were received in the Begin() but we need to call this nop */
1148     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1149   }
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 EXTERN_C_BEGIN
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1156 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1157 {
1158   MPI_Comm       comm;
1159   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1160   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1161   IS             Me,Notme;
1162   PetscErrorCode ierr;
1163   PetscInt       M,N,first,last,*notme,i;
1164   PetscMPIInt    size;
1165 
1166   PetscFunctionBegin;
1167 
1168   /* Easy test: symmetric diagonal block */
1169   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1170   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1171   if (!*f) PetscFunctionReturn(0);
1172   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1174   if (size == 1) PetscFunctionReturn(0);
1175 
1176   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1177   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1178   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1179   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1180   for (i=0; i<first; i++) notme[i] = i;
1181   for (i=last; i<M; i++) notme[i-last+first] = i;
1182   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1183   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1184   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1185   Aoff = Aoffs[0];
1186   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1187   Boff = Boffs[0];
1188   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1189   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1191   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1193   ierr = PetscFree(notme);CHKERRQ(ierr);
1194    PetscFunctionReturn(0);
1195 }
1196 EXTERN_C_END
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1200 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   /* do nondiagonal part */
1207   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1208   /* send it on its way */
1209   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   /* do local part */
1211   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1212   /* receive remote parts */
1213   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /*
1218   This only works correctly for square matrices where the subblock A->A is the
1219    diagonal block
1220 */
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1223 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1224 {
1225   PetscErrorCode ierr;
1226   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1227 
1228   PetscFunctionBegin;
1229   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1230   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1231   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatScale_MPIAIJ"
1237 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1238 {
1239   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1244   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatDestroy_MPIAIJ"
1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1251 {
1252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256 #if defined(PETSC_USE_LOG)
1257   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1258 #endif
1259   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1261   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1263 #if defined (PETSC_USE_CTABLE)
1264   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1265 #else
1266   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1267 #endif
1268   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1269   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1270   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1271   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1272   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1273   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1274 
1275   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1292   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1293   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1294   PetscErrorCode    ierr;
1295   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1296   int               fd;
1297   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1298   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1299   PetscScalar       *column_values;
1300   PetscInt          message_count,flowcontrolcount;
1301 
1302   PetscFunctionBegin;
1303   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1304   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1305   nz   = A->nz + B->nz;
1306   if (!rank) {
1307     header[0] = MAT_FILE_CLASSID;
1308     header[1] = mat->rmap->N;
1309     header[2] = mat->cmap->N;
1310     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1311     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1312     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     /* get largest number of rows any processor has */
1314     rlen = mat->rmap->n;
1315     range = mat->rmap->range;
1316     for (i=1; i<size; i++) {
1317       rlen = PetscMax(rlen,range[i+1] - range[i]);
1318     }
1319   } else {
1320     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1321     rlen = mat->rmap->n;
1322   }
1323 
1324   /* load up the local row counts */
1325   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1326   for (i=0; i<mat->rmap->n; i++) {
1327     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1328   }
1329 
1330   /* store the row lengths to the file */
1331   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1332   if (!rank) {
1333     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     for (i=1; i<size; i++) {
1335       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1336       rlen = range[i+1] - range[i];
1337       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1338       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1339     }
1340     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1341   } else {
1342     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1343     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1344     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1345   }
1346   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1347 
1348   /* load up the local column indices */
1349   nzmax = nz; /* )th processor needs space a largest processor needs */
1350   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1351   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1352   cnt  = 0;
1353   for (i=0; i<mat->rmap->n; i++) {
1354     for (j=B->i[i]; j<B->i[i+1]; j++) {
1355       if ( (col = garray[B->j[j]]) > cstart) break;
1356       column_indices[cnt++] = col;
1357     }
1358     for (k=A->i[i]; k<A->i[i+1]; k++) {
1359       column_indices[cnt++] = A->j[k] + cstart;
1360     }
1361     for (; j<B->i[i+1]; j++) {
1362       column_indices[cnt++] = garray[B->j[j]];
1363     }
1364   }
1365   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1366 
1367   /* store the column indices to the file */
1368    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1369   if (!rank) {
1370     MPI_Status status;
1371     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1372     for (i=1; i<size; i++) {
1373       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1374       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1375       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1376       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1377       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1378     }
1379      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1380   } else {
1381     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1382     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1383     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1384     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1385   }
1386   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1387 
1388   /* load up the local column values */
1389   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1390   cnt  = 0;
1391   for (i=0; i<mat->rmap->n; i++) {
1392     for (j=B->i[i]; j<B->i[i+1]; j++) {
1393       if ( garray[B->j[j]] > cstart) break;
1394       column_values[cnt++] = B->a[j];
1395     }
1396     for (k=A->i[i]; k<A->i[i+1]; k++) {
1397       column_values[cnt++] = A->a[k];
1398     }
1399     for (; j<B->i[i+1]; j++) {
1400       column_values[cnt++] = B->a[j];
1401     }
1402   }
1403   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1404 
1405   /* store the column values to the file */
1406    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1407   if (!rank) {
1408     MPI_Status status;
1409     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1410     for (i=1; i<size; i++) {
1411        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1412       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1413       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1414       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1415       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1416     }
1417     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1418   } else {
1419     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1420     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1421     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1422     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1423   }
1424   ierr = PetscFree(column_values);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1430 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1431 {
1432   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1433   PetscErrorCode    ierr;
1434   PetscMPIInt       rank = aij->rank,size = aij->size;
1435   PetscBool         isdraw,iascii,isbinary;
1436   PetscViewer       sviewer;
1437   PetscViewerFormat format;
1438 
1439   PetscFunctionBegin;
1440   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1441   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1442   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1443   if (iascii) {
1444     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1445     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1446       MatInfo    info;
1447       PetscBool  inodes;
1448 
1449       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1450       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1451       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1452       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1453       if (!inodes) {
1454         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1455 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1456       } else {
1457         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1458 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1459       }
1460       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1462       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1463       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1464       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1465       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1467       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1468       PetscFunctionReturn(0);
1469     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1470       PetscInt   inodecount,inodelimit,*inodes;
1471       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1472       if (inodes) {
1473         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1474       } else {
1475         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1476       }
1477       PetscFunctionReturn(0);
1478     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1479       PetscFunctionReturn(0);
1480     }
1481   } else if (isbinary) {
1482     if (size == 1) {
1483       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1484       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1485     } else {
1486       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1487     }
1488     PetscFunctionReturn(0);
1489   } else if (isdraw) {
1490     PetscDraw  draw;
1491     PetscBool  isnull;
1492     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1493     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1494   }
1495 
1496   if (size == 1) {
1497     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499   } else {
1500     /* assemble the entire matrix onto first processor. */
1501     Mat         A;
1502     Mat_SeqAIJ  *Aloc;
1503     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1504     MatScalar   *a;
1505 
1506     if (mat->rmap->N > 1024) {
1507       PetscBool  flg = PETSC_FALSE;
1508 
1509       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1510       if (!flg) {
1511         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1512       }
1513     }
1514 
1515     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1516     if (!rank) {
1517       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1518     } else {
1519       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1520     }
1521     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1522     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1523     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1524     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1525     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1526 
1527     /* copy over the A part */
1528     Aloc = (Mat_SeqAIJ*)aij->A->data;
1529     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1530     row = mat->rmap->rstart;
1531     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1532     for (i=0; i<m; i++) {
1533       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1534       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1535     }
1536     aj = Aloc->j;
1537     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1538 
1539     /* copy over the B part */
1540     Aloc = (Mat_SeqAIJ*)aij->B->data;
1541     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1544     ct   = cols;
1545     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1546     for (i=0; i<m; i++) {
1547       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1548       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1549     }
1550     ierr = PetscFree(ct);CHKERRQ(ierr);
1551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     /*
1554        Everyone has to call to draw the matrix since the graphics waits are
1555        synchronized across all processors that share the PetscDraw object
1556     */
1557     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1558     if (!rank) {
1559       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1560       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1561       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1562       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1563     }
1564     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1565     ierr = MatDestroy(&A);CHKERRQ(ierr);
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatView_MPIAIJ"
1572 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1573 {
1574   PetscErrorCode ierr;
1575   PetscBool      iascii,isdraw,issocket,isbinary;
1576 
1577   PetscFunctionBegin;
1578   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1579   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1580   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1581   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1582   if (iascii || isdraw || isbinary || issocket) {
1583     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1584   } else {
1585     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatSOR_MPIAIJ"
1592 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1593 {
1594   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1595   PetscErrorCode ierr;
1596   Vec            bb1 = 0;
1597   PetscBool      hasop;
1598 
1599   PetscFunctionBegin;
1600   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1601     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1602   }
1603 
1604   if (flag == SOR_APPLY_UPPER) {
1605     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1606     PetscFunctionReturn(0);
1607   }
1608 
1609   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1610     if (flag & SOR_ZERO_INITIAL_GUESS) {
1611       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1612       its--;
1613     }
1614 
1615     while (its--) {
1616       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1618 
1619       /* update rhs: bb1 = bb - B*x */
1620       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1621       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1622 
1623       /* local sweep */
1624       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1625     }
1626   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1627     if (flag & SOR_ZERO_INITIAL_GUESS) {
1628       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1629       its--;
1630     }
1631     while (its--) {
1632       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1634 
1635       /* update rhs: bb1 = bb - B*x */
1636       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1637       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1638 
1639       /* local sweep */
1640       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1641     }
1642   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1643     if (flag & SOR_ZERO_INITIAL_GUESS) {
1644       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1645       its--;
1646     }
1647     while (its--) {
1648       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1649       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1650 
1651       /* update rhs: bb1 = bb - B*x */
1652       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1653       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1654 
1655       /* local sweep */
1656       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1657     }
1658   }  else if (flag & SOR_EISENSTAT) {
1659     Vec         xx1;
1660 
1661     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1662     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1663 
1664     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1665     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666     if (!mat->diag) {
1667       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1668       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1669     }
1670     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1671     if (hasop) {
1672       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1673     } else {
1674       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1675     }
1676     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1677 
1678     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1679 
1680     /* local sweep */
1681     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1682     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1683     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1684   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1685 
1686   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatPermute_MPIAIJ"
1692 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1693 {
1694   MPI_Comm       comm;
1695   PetscInt       first,local_rowsize,local_colsize;
1696   const PetscInt *rows;
1697   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1698   PetscErrorCode ierr;
1699 
1700   PetscFunctionBegin;
1701   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1702   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1703   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1704   /* collect the global row permutation and invert it */
1705   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1706   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1707   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1708   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1709   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1710   /* get the local target indices */
1711   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1712   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1713   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1714   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1715   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1716   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1717   /* the column permutation is so much easier;
1718      make a local version of 'colp' and invert it */
1719   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1720   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1721   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1722   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1723   /* now we just get the submatrix */
1724   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1725   /* clean up */
1726   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1727   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1733 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1734 {
1735   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1736   Mat            A = mat->A,B = mat->B;
1737   PetscErrorCode ierr;
1738   PetscReal      isend[5],irecv[5];
1739 
1740   PetscFunctionBegin;
1741   info->block_size     = 1.0;
1742   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1743   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1744   isend[3] = info->memory;  isend[4] = info->mallocs;
1745   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1746   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1747   isend[3] += info->memory;  isend[4] += info->mallocs;
1748   if (flag == MAT_LOCAL) {
1749     info->nz_used      = isend[0];
1750     info->nz_allocated = isend[1];
1751     info->nz_unneeded  = isend[2];
1752     info->memory       = isend[3];
1753     info->mallocs      = isend[4];
1754   } else if (flag == MAT_GLOBAL_MAX) {
1755     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1756     info->nz_used      = irecv[0];
1757     info->nz_allocated = irecv[1];
1758     info->nz_unneeded  = irecv[2];
1759     info->memory       = irecv[3];
1760     info->mallocs      = irecv[4];
1761   } else if (flag == MAT_GLOBAL_SUM) {
1762     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1763     info->nz_used      = irecv[0];
1764     info->nz_allocated = irecv[1];
1765     info->nz_unneeded  = irecv[2];
1766     info->memory       = irecv[3];
1767     info->mallocs      = irecv[4];
1768   }
1769   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1770   info->fill_ratio_needed = 0;
1771   info->factor_mallocs    = 0;
1772 
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatSetOption_MPIAIJ"
1778 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1779 {
1780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1781   PetscErrorCode ierr;
1782 
1783   PetscFunctionBegin;
1784   switch (op) {
1785   case MAT_NEW_NONZERO_LOCATIONS:
1786   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1787   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1788   case MAT_KEEP_NONZERO_PATTERN:
1789   case MAT_NEW_NONZERO_LOCATION_ERR:
1790   case MAT_USE_INODES:
1791   case MAT_IGNORE_ZERO_ENTRIES:
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_ROW_ORIENTED:
1796     a->roworiented = flg;
1797     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1798     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1799     break;
1800   case MAT_NEW_DIAGONALS:
1801     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1802     break;
1803   case MAT_IGNORE_OFF_PROC_ENTRIES:
1804     a->donotstash = flg;
1805     break;
1806   case MAT_SPD:
1807     A->spd_set                         = PETSC_TRUE;
1808     A->spd                             = flg;
1809     if (flg) {
1810       A->symmetric                     = PETSC_TRUE;
1811       A->structurally_symmetric        = PETSC_TRUE;
1812       A->symmetric_set                 = PETSC_TRUE;
1813       A->structurally_symmetric_set    = PETSC_TRUE;
1814     }
1815     break;
1816   case MAT_SYMMETRIC:
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_STRUCTURALLY_SYMMETRIC:
1820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1821     break;
1822   case MAT_HERMITIAN:
1823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_SYMMETRY_ETERNAL:
1826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1827     break;
1828   default:
1829     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1830   }
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatGetRow_MPIAIJ"
1836 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1837 {
1838   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1839   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1840   PetscErrorCode ierr;
1841   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1842   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1843   PetscInt       *cmap,*idx_p;
1844 
1845   PetscFunctionBegin;
1846   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1847   mat->getrowactive = PETSC_TRUE;
1848 
1849   if (!mat->rowvalues && (idx || v)) {
1850     /*
1851         allocate enough space to hold information from the longest row.
1852     */
1853     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1854     PetscInt   max = 1,tmp;
1855     for (i=0; i<matin->rmap->n; i++) {
1856       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1857       if (max < tmp) { max = tmp; }
1858     }
1859     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1860   }
1861 
1862   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1863   lrow = row - rstart;
1864 
1865   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1866   if (!v)   {pvA = 0; pvB = 0;}
1867   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1868   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1869   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1870   nztot = nzA + nzB;
1871 
1872   cmap  = mat->garray;
1873   if (v  || idx) {
1874     if (nztot) {
1875       /* Sort by increasing column numbers, assuming A and B already sorted */
1876       PetscInt imark = -1;
1877       if (v) {
1878         *v = v_p = mat->rowvalues;
1879         for (i=0; i<nzB; i++) {
1880           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1881           else break;
1882         }
1883         imark = i;
1884         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1885         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1886       }
1887       if (idx) {
1888         *idx = idx_p = mat->rowindices;
1889         if (imark > -1) {
1890           for (i=0; i<imark; i++) {
1891             idx_p[i] = cmap[cworkB[i]];
1892           }
1893         } else {
1894           for (i=0; i<nzB; i++) {
1895             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1896             else break;
1897           }
1898           imark = i;
1899         }
1900         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1901         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1902       }
1903     } else {
1904       if (idx) *idx = 0;
1905       if (v)   *v   = 0;
1906     }
1907   }
1908   *nz = nztot;
1909   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1910   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1917 {
1918   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1919 
1920   PetscFunctionBegin;
1921   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1922   aij->getrowactive = PETSC_FALSE;
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatNorm_MPIAIJ"
1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1929 {
1930   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1931   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1932   PetscErrorCode ierr;
1933   PetscInt       i,j,cstart = mat->cmap->rstart;
1934   PetscReal      sum = 0.0;
1935   MatScalar      *v;
1936 
1937   PetscFunctionBegin;
1938   if (aij->size == 1) {
1939     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1940   } else {
1941     if (type == NORM_FROBENIUS) {
1942       v = amat->a;
1943       for (i=0; i<amat->nz; i++) {
1944 #if defined(PETSC_USE_COMPLEX)
1945         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1946 #else
1947         sum += (*v)*(*v); v++;
1948 #endif
1949       }
1950       v = bmat->a;
1951       for (i=0; i<bmat->nz; i++) {
1952 #if defined(PETSC_USE_COMPLEX)
1953         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1954 #else
1955         sum += (*v)*(*v); v++;
1956 #endif
1957       }
1958       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1959       *norm = PetscSqrtReal(*norm);
1960     } else if (type == NORM_1) { /* max column norm */
1961       PetscReal *tmp,*tmp2;
1962       PetscInt  *jj,*garray = aij->garray;
1963       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1964       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1965       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1966       *norm = 0.0;
1967       v = amat->a; jj = amat->j;
1968       for (j=0; j<amat->nz; j++) {
1969         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1970       }
1971       v = bmat->a; jj = bmat->j;
1972       for (j=0; j<bmat->nz; j++) {
1973         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1974       }
1975       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1976       for (j=0; j<mat->cmap->N; j++) {
1977         if (tmp2[j] > *norm) *norm = tmp2[j];
1978       }
1979       ierr = PetscFree(tmp);CHKERRQ(ierr);
1980       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1981     } else if (type == NORM_INFINITY) { /* max row norm */
1982       PetscReal ntemp = 0.0;
1983       for (j=0; j<aij->A->rmap->n; j++) {
1984         v = amat->a + amat->i[j];
1985         sum = 0.0;
1986         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         v = bmat->a + bmat->i[j];
1990         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1991           sum += PetscAbsScalar(*v); v++;
1992         }
1993         if (sum > ntemp) ntemp = sum;
1994       }
1995       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1996     } else {
1997       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1998     }
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatTranspose_MPIAIJ"
2005 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2006 {
2007   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2008   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2009   PetscErrorCode ierr;
2010   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2011   PetscInt       cstart=A->cmap->rstart,ncol;
2012   Mat            B;
2013   MatScalar      *array;
2014 
2015   PetscFunctionBegin;
2016   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2017 
2018   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2019   ai = Aloc->i; aj = Aloc->j;
2020   bi = Bloc->i; bj = Bloc->j;
2021   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2022     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2023     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2024     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2025     for (i=0; i<ai[ma]; i++){
2026       d_nnz[aj[i]] ++;
2027       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2028     }
2029 
2030     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2031     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2032     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2033     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2034     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2035     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2036   } else {
2037     B = *matout;
2038   }
2039 
2040   /* copy over the A part */
2041   array = Aloc->a;
2042   row = A->rmap->rstart;
2043   for (i=0; i<ma; i++) {
2044     ncol = ai[i+1]-ai[i];
2045     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2046     row++; array += ncol; aj += ncol;
2047   }
2048   aj = Aloc->j;
2049   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2050 
2051   /* copy over the B part */
2052   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2053   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2054   array = Bloc->a;
2055   row = A->rmap->rstart;
2056   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2057   cols_tmp = cols;
2058   for (i=0; i<mb; i++) {
2059     ncol = bi[i+1]-bi[i];
2060     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2061     row++; array += ncol; cols_tmp += ncol;
2062   }
2063   ierr = PetscFree(cols);CHKERRQ(ierr);
2064 
2065   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2067   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2068     *matout = B;
2069   } else {
2070     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2077 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2078 {
2079   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2080   Mat            a = aij->A,b = aij->B;
2081   PetscErrorCode ierr;
2082   PetscInt       s1,s2,s3;
2083 
2084   PetscFunctionBegin;
2085   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2086   if (rr) {
2087     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2088     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2089     /* Overlap communication with computation. */
2090     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2091   }
2092   if (ll) {
2093     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2094     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2095     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2096   }
2097   /* scale  the diagonal block */
2098   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2099 
2100   if (rr) {
2101     /* Do a scatter end and then right scale the off-diagonal block */
2102     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2103     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2104   }
2105 
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 #undef __FUNCT__
2110 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2111 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2112 {
2113   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatEqual_MPIAIJ"
2123 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2124 {
2125   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2126   Mat            a,b,c,d;
2127   PetscBool      flg;
2128   PetscErrorCode ierr;
2129 
2130   PetscFunctionBegin;
2131   a = matA->A; b = matA->B;
2132   c = matB->A; d = matB->B;
2133 
2134   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2135   if (flg) {
2136     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2137   }
2138   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatCopy_MPIAIJ"
2144 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2145 {
2146   PetscErrorCode ierr;
2147   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2148   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2149 
2150   PetscFunctionBegin;
2151   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2152   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2153     /* because of the column compression in the off-processor part of the matrix a->B,
2154        the number of columns in a->B and b->B may be different, hence we cannot call
2155        the MatCopy() directly on the two parts. If need be, we can provide a more
2156        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2157        then copying the submatrices */
2158     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2159   } else {
2160     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2161     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatSetUp_MPIAIJ"
2168 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2169 {
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2179 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2180 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2181 {
2182   PetscInt          i,m=Y->rmap->N;
2183   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2184   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2185   const PetscInt    *xi = x->i,*yi = y->i;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for(i=0; i<m; i++) {
2190     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2191     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2192     nnz[i] = 0;
2193     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2194       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2195       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2196       nnz[i]++;
2197     }
2198     for (; k<nzy; k++) nnz[i]++;
2199   }
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 #undef __FUNCT__
2204 #define __FUNCT__ "MatAXPY_MPIAIJ"
2205 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       i;
2209   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2210   PetscBLASInt   bnz,one=1;
2211   Mat_SeqAIJ     *x,*y;
2212 
2213   PetscFunctionBegin;
2214   if (str == SAME_NONZERO_PATTERN) {
2215     PetscScalar alpha = a;
2216     x = (Mat_SeqAIJ *)xx->A->data;
2217     y = (Mat_SeqAIJ *)yy->A->data;
2218     bnz = PetscBLASIntCast(x->nz);
2219     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2220     x = (Mat_SeqAIJ *)xx->B->data;
2221     y = (Mat_SeqAIJ *)yy->B->data;
2222     bnz = PetscBLASIntCast(x->nz);
2223     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2224   } else if (str == SUBSET_NONZERO_PATTERN) {
2225     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2226 
2227     x = (Mat_SeqAIJ *)xx->B->data;
2228     y = (Mat_SeqAIJ *)yy->B->data;
2229     if (y->xtoy && y->XtoY != xx->B) {
2230       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2231       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2232     }
2233     if (!y->xtoy) { /* get xtoy */
2234       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2235       y->XtoY = xx->B;
2236       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2237     }
2238     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2239   } else {
2240     Mat B;
2241     PetscInt *nnz_d,*nnz_o;
2242     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2243     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2244     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2245     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2246     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2247     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2248     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2250     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2251     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2252     ierr = MatHeaderReplace(Y,B);
2253     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatConjugate_MPIAIJ"
2263 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2264 {
2265 #if defined(PETSC_USE_COMPLEX)
2266   PetscErrorCode ierr;
2267   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2268 
2269   PetscFunctionBegin;
2270   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2271   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2272 #else
2273   PetscFunctionBegin;
2274 #endif
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatRealPart_MPIAIJ"
2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2287   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #ifdef PETSC_HAVE_PBGL
2305 
2306 #include <boost/parallel/mpi/bsp_process_group.hpp>
2307 #include <boost/graph/distributed/ilu_default_graph.hpp>
2308 #include <boost/graph/distributed/ilu_0_block.hpp>
2309 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2310 #include <boost/graph/distributed/petsc/interface.hpp>
2311 #include <boost/multi_array.hpp>
2312 #include <boost/parallel/distributed_property_map->hpp>
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2316 /*
2317   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2318 */
2319 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2320 {
2321   namespace petsc = boost::distributed::petsc;
2322 
2323   namespace graph_dist = boost::graph::distributed;
2324   using boost::graph::distributed::ilu_default::process_group_type;
2325   using boost::graph::ilu_permuted;
2326 
2327   PetscBool       row_identity, col_identity;
2328   PetscContainer  c;
2329   PetscInt        m, n, M, N;
2330   PetscErrorCode  ierr;
2331 
2332   PetscFunctionBegin;
2333   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2334   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2335   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2336   if (!row_identity || !col_identity) {
2337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2338   }
2339 
2340   process_group_type pg;
2341   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2342   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2343   lgraph_type&   level_graph = *lgraph_p;
2344   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2345 
2346   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2347   ilu_permuted(level_graph);
2348 
2349   /* put together the new matrix */
2350   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2351   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2352   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2353   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2354   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2355   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2356   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2357 
2358   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2359   ierr = PetscContainerSetPointer(c, lgraph_p);
2360   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2361   ierr = PetscContainerDestroy(&c);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2367 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2368 {
2369   PetscFunctionBegin;
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatSolve_MPIAIJ"
2375 /*
2376   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2377 */
2378 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2379 {
2380   namespace graph_dist = boost::graph::distributed;
2381 
2382   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2383   lgraph_type*   lgraph_p;
2384   PetscContainer c;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2389   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2390   ierr = VecCopy(b, x);CHKERRQ(ierr);
2391 
2392   PetscScalar* array_x;
2393   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2394   PetscInt sx;
2395   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2396 
2397   PetscScalar* array_b;
2398   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2399   PetscInt sb;
2400   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2401 
2402   lgraph_type&   level_graph = *lgraph_p;
2403   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2404 
2405   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2406   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2407                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2408 
2409   typedef boost::iterator_property_map<array_ref_type::iterator,
2410                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2411   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2412                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2413 
2414   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2415 
2416   PetscFunctionReturn(0);
2417 }
2418 #endif
2419 
2420 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2421   PetscInt       nzlocal,nsends,nrecvs;
2422   PetscMPIInt    *send_rank,*recv_rank;
2423   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2424   PetscScalar    *sbuf_a,**rbuf_a;
2425   PetscErrorCode (*Destroy)(Mat);
2426 } Mat_Redundant;
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2430 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2431 {
2432   PetscErrorCode       ierr;
2433   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2434   PetscInt             i;
2435 
2436   PetscFunctionBegin;
2437   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2438   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2439   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2440   for (i=0; i<redund->nrecvs; i++){
2441     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2442     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2443   }
2444   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2445   ierr = PetscFree(redund);CHKERRQ(ierr);
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatDestroy_MatRedundant"
2451 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2452 {
2453   PetscErrorCode  ierr;
2454   PetscContainer  container;
2455   Mat_Redundant   *redund=PETSC_NULL;
2456 
2457   PetscFunctionBegin;
2458   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2459   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2460   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2461   A->ops->destroy = redund->Destroy;
2462   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2463   if (A->ops->destroy) {
2464     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2465   }
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2471 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2472 {
2473   PetscMPIInt    rank,size;
2474   MPI_Comm       comm=((PetscObject)mat)->comm;
2475   PetscErrorCode ierr;
2476   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2477   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2478   PetscInt       *rowrange=mat->rmap->range;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2480   Mat            A=aij->A,B=aij->B,C=*matredundant;
2481   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2482   PetscScalar    *sbuf_a;
2483   PetscInt       nzlocal=a->nz+b->nz;
2484   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2485   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2486   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2487   MatScalar      *aworkA,*aworkB;
2488   PetscScalar    *vals;
2489   PetscMPIInt    tag1,tag2,tag3,imdex;
2490   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2491                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2492   MPI_Status     recv_status,*send_status;
2493   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2494   PetscInt       **rbuf_j=PETSC_NULL;
2495   PetscScalar    **rbuf_a=PETSC_NULL;
2496   Mat_Redundant  *redund=PETSC_NULL;
2497   PetscContainer container;
2498 
2499   PetscFunctionBegin;
2500   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2501   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2502 
2503   if (reuse == MAT_REUSE_MATRIX) {
2504     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2505     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2506     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2507     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2508     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2509     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2510     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2511     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2512 
2513     nsends    = redund->nsends;
2514     nrecvs    = redund->nrecvs;
2515     send_rank = redund->send_rank;
2516     recv_rank = redund->recv_rank;
2517     sbuf_nz   = redund->sbuf_nz;
2518     rbuf_nz   = redund->rbuf_nz;
2519     sbuf_j    = redund->sbuf_j;
2520     sbuf_a    = redund->sbuf_a;
2521     rbuf_j    = redund->rbuf_j;
2522     rbuf_a    = redund->rbuf_a;
2523   }
2524 
2525   if (reuse == MAT_INITIAL_MATRIX){
2526     PetscMPIInt  subrank,subsize;
2527     PetscInt     nleftover,np_subcomm;
2528     /* get the destination processors' id send_rank, nsends and nrecvs */
2529     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2530     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2531     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2532     np_subcomm = size/nsubcomm;
2533     nleftover  = size - nsubcomm*np_subcomm;
2534     nsends = 0; nrecvs = 0;
2535     for (i=0; i<size; i++){ /* i=rank*/
2536       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2537         send_rank[nsends] = i; nsends++;
2538         recv_rank[nrecvs++] = i;
2539       }
2540     }
2541     if (rank >= size - nleftover){/* this proc is a leftover processor */
2542       i = size-nleftover-1;
2543       j = 0;
2544       while (j < nsubcomm - nleftover){
2545         send_rank[nsends++] = i;
2546         i--; j++;
2547       }
2548     }
2549 
2550     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2551       for (i=0; i<nleftover; i++){
2552         recv_rank[nrecvs++] = size-nleftover+i;
2553       }
2554     }
2555 
2556     /* allocate sbuf_j, sbuf_a */
2557     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2558     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2559     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2560   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2561 
2562   /* copy mat's local entries into the buffers */
2563   if (reuse == MAT_INITIAL_MATRIX){
2564     rownz_max = 0;
2565     rptr = sbuf_j;
2566     cols = sbuf_j + rend-rstart + 1;
2567     vals = sbuf_a;
2568     rptr[0] = 0;
2569     for (i=0; i<rend-rstart; i++){
2570       row = i + rstart;
2571       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2572       ncols  = nzA + nzB;
2573       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2574       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2575       /* load the column indices for this row into cols */
2576       lwrite = 0;
2577       for (l=0; l<nzB; l++) {
2578         if ((ctmp = bmap[cworkB[l]]) < cstart){
2579           vals[lwrite]   = aworkB[l];
2580           cols[lwrite++] = ctmp;
2581         }
2582       }
2583       for (l=0; l<nzA; l++){
2584         vals[lwrite]   = aworkA[l];
2585         cols[lwrite++] = cstart + cworkA[l];
2586       }
2587       for (l=0; l<nzB; l++) {
2588         if ((ctmp = bmap[cworkB[l]]) >= cend){
2589           vals[lwrite]   = aworkB[l];
2590           cols[lwrite++] = ctmp;
2591         }
2592       }
2593       vals += ncols;
2594       cols += ncols;
2595       rptr[i+1] = rptr[i] + ncols;
2596       if (rownz_max < ncols) rownz_max = ncols;
2597     }
2598     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2599   } else { /* only copy matrix values into sbuf_a */
2600     rptr = sbuf_j;
2601     vals = sbuf_a;
2602     rptr[0] = 0;
2603     for (i=0; i<rend-rstart; i++){
2604       row = i + rstart;
2605       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2606       ncols  = nzA + nzB;
2607       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2608       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2609       lwrite = 0;
2610       for (l=0; l<nzB; l++) {
2611         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2612       }
2613       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2614       for (l=0; l<nzB; l++) {
2615         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2616       }
2617       vals += ncols;
2618       rptr[i+1] = rptr[i] + ncols;
2619     }
2620   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2621 
2622   /* send nzlocal to others, and recv other's nzlocal */
2623   /*--------------------------------------------------*/
2624   if (reuse == MAT_INITIAL_MATRIX){
2625     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2626     s_waits2 = s_waits3 + nsends;
2627     s_waits1 = s_waits2 + nsends;
2628     r_waits1 = s_waits1 + nsends;
2629     r_waits2 = r_waits1 + nrecvs;
2630     r_waits3 = r_waits2 + nrecvs;
2631   } else {
2632     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2633     r_waits3 = s_waits3 + nsends;
2634   }
2635 
2636   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2637   if (reuse == MAT_INITIAL_MATRIX){
2638     /* get new tags to keep the communication clean */
2639     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2640     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2641     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2642 
2643     /* post receives of other's nzlocal */
2644     for (i=0; i<nrecvs; i++){
2645       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2646     }
2647     /* send nzlocal to others */
2648     for (i=0; i<nsends; i++){
2649       sbuf_nz[i] = nzlocal;
2650       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2651     }
2652     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2653     count = nrecvs;
2654     while (count) {
2655       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2656       recv_rank[imdex] = recv_status.MPI_SOURCE;
2657       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2658       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2659 
2660       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2661       rbuf_nz[imdex] += i + 2;
2662       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2663       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2664       count--;
2665     }
2666     /* wait on sends of nzlocal */
2667     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2668     /* send mat->i,j to others, and recv from other's */
2669     /*------------------------------------------------*/
2670     for (i=0; i<nsends; i++){
2671       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2672       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2673     }
2674     /* wait on receives of mat->i,j */
2675     /*------------------------------*/
2676     count = nrecvs;
2677     while (count) {
2678       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2679       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2680       count--;
2681     }
2682     /* wait on sends of mat->i,j */
2683     /*---------------------------*/
2684     if (nsends) {
2685       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2686     }
2687   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2688 
2689   /* post receives, send and receive mat->a */
2690   /*----------------------------------------*/
2691   for (imdex=0; imdex<nrecvs; imdex++) {
2692     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2693   }
2694   for (i=0; i<nsends; i++){
2695     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2696   }
2697   count = nrecvs;
2698   while (count) {
2699     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2700     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2701     count--;
2702   }
2703   if (nsends) {
2704     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2705   }
2706 
2707   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2708 
2709   /* create redundant matrix */
2710   /*-------------------------*/
2711   if (reuse == MAT_INITIAL_MATRIX){
2712     /* compute rownz_max for preallocation */
2713     for (imdex=0; imdex<nrecvs; imdex++){
2714       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2715       rptr = rbuf_j[imdex];
2716       for (i=0; i<j; i++){
2717         ncols = rptr[i+1] - rptr[i];
2718         if (rownz_max < ncols) rownz_max = ncols;
2719       }
2720     }
2721 
2722     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2723     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2724     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2725     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2726     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2727   } else {
2728     C = *matredundant;
2729   }
2730 
2731   /* insert local matrix entries */
2732   rptr = sbuf_j;
2733   cols = sbuf_j + rend-rstart + 1;
2734   vals = sbuf_a;
2735   for (i=0; i<rend-rstart; i++){
2736     row   = i + rstart;
2737     ncols = rptr[i+1] - rptr[i];
2738     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2739     vals += ncols;
2740     cols += ncols;
2741   }
2742   /* insert received matrix entries */
2743   for (imdex=0; imdex<nrecvs; imdex++){
2744     rstart = rowrange[recv_rank[imdex]];
2745     rend   = rowrange[recv_rank[imdex]+1];
2746     rptr = rbuf_j[imdex];
2747     cols = rbuf_j[imdex] + rend-rstart + 1;
2748     vals = rbuf_a[imdex];
2749     for (i=0; i<rend-rstart; i++){
2750       row   = i + rstart;
2751       ncols = rptr[i+1] - rptr[i];
2752       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2753       vals += ncols;
2754       cols += ncols;
2755     }
2756   }
2757   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2760   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2761   if (reuse == MAT_INITIAL_MATRIX) {
2762     PetscContainer container;
2763     *matredundant = C;
2764     /* create a supporting struct and attach it to C for reuse */
2765     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2766     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2767     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2768     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2769     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2770     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2771 
2772     redund->nzlocal = nzlocal;
2773     redund->nsends  = nsends;
2774     redund->nrecvs  = nrecvs;
2775     redund->send_rank = send_rank;
2776     redund->recv_rank = recv_rank;
2777     redund->sbuf_nz = sbuf_nz;
2778     redund->rbuf_nz = rbuf_nz;
2779     redund->sbuf_j  = sbuf_j;
2780     redund->sbuf_a  = sbuf_a;
2781     redund->rbuf_j  = rbuf_j;
2782     redund->rbuf_a  = rbuf_a;
2783 
2784     redund->Destroy = C->ops->destroy;
2785     C->ops->destroy = MatDestroy_MatRedundant;
2786   }
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2792 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2793 {
2794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2795   PetscErrorCode ierr;
2796   PetscInt       i,*idxb = 0;
2797   PetscScalar    *va,*vb;
2798   Vec            vtmp;
2799 
2800   PetscFunctionBegin;
2801   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2802   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2803   if (idx) {
2804     for (i=0; i<A->rmap->n; i++) {
2805       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2806     }
2807   }
2808 
2809   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2810   if (idx) {
2811     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2812   }
2813   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2814   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2815 
2816   for (i=0; i<A->rmap->n; i++){
2817     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2818       va[i] = vb[i];
2819       if (idx) idx[i] = a->garray[idxb[i]];
2820     }
2821   }
2822 
2823   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2824   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2825   ierr = PetscFree(idxb);CHKERRQ(ierr);
2826   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2832 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2833 {
2834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2835   PetscErrorCode ierr;
2836   PetscInt       i,*idxb = 0;
2837   PetscScalar    *va,*vb;
2838   Vec            vtmp;
2839 
2840   PetscFunctionBegin;
2841   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2842   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2843   if (idx) {
2844     for (i=0; i<A->cmap->n; i++) {
2845       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2846     }
2847   }
2848 
2849   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2850   if (idx) {
2851     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2852   }
2853   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2854   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2855 
2856   for (i=0; i<A->rmap->n; i++){
2857     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2858       va[i] = vb[i];
2859       if (idx) idx[i] = a->garray[idxb[i]];
2860     }
2861   }
2862 
2863   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2864   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2865   ierr = PetscFree(idxb);CHKERRQ(ierr);
2866   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2872 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2873 {
2874   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2875   PetscInt       n      = A->rmap->n;
2876   PetscInt       cstart = A->cmap->rstart;
2877   PetscInt      *cmap   = mat->garray;
2878   PetscInt      *diagIdx, *offdiagIdx;
2879   Vec            diagV, offdiagV;
2880   PetscScalar   *a, *diagA, *offdiagA;
2881   PetscInt       r;
2882   PetscErrorCode ierr;
2883 
2884   PetscFunctionBegin;
2885   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2886   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2887   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2888   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2889   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2890   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2891   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2892   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2893   for(r = 0; r < n; ++r) {
2894     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2895       a[r]   = diagA[r];
2896       idx[r] = cstart + diagIdx[r];
2897     } else {
2898       a[r]   = offdiagA[r];
2899       idx[r] = cmap[offdiagIdx[r]];
2900     }
2901   }
2902   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2903   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2904   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2905   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2906   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2907   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2913 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2914 {
2915   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2916   PetscInt       n      = A->rmap->n;
2917   PetscInt       cstart = A->cmap->rstart;
2918   PetscInt      *cmap   = mat->garray;
2919   PetscInt      *diagIdx, *offdiagIdx;
2920   Vec            diagV, offdiagV;
2921   PetscScalar   *a, *diagA, *offdiagA;
2922   PetscInt       r;
2923   PetscErrorCode ierr;
2924 
2925   PetscFunctionBegin;
2926   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2927   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2928   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2929   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2930   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2931   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2932   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2933   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2934   for(r = 0; r < n; ++r) {
2935     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2936       a[r]   = diagA[r];
2937       idx[r] = cstart + diagIdx[r];
2938     } else {
2939       a[r]   = offdiagA[r];
2940       idx[r] = cmap[offdiagIdx[r]];
2941     }
2942   }
2943   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2944   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2945   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2946   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2947   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2948   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2954 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2955 {
2956   PetscErrorCode ierr;
2957   Mat            *dummy;
2958 
2959   PetscFunctionBegin;
2960   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2961   *newmat = *dummy;
2962   ierr = PetscFree(dummy);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2970 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2971 {
2972   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 
2981 /* -------------------------------------------------------------------*/
2982 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2983        MatGetRow_MPIAIJ,
2984        MatRestoreRow_MPIAIJ,
2985        MatMult_MPIAIJ,
2986 /* 4*/ MatMultAdd_MPIAIJ,
2987        MatMultTranspose_MPIAIJ,
2988        MatMultTransposeAdd_MPIAIJ,
2989 #ifdef PETSC_HAVE_PBGL
2990        MatSolve_MPIAIJ,
2991 #else
2992        0,
2993 #endif
2994        0,
2995        0,
2996 /*10*/ 0,
2997        0,
2998        0,
2999        MatSOR_MPIAIJ,
3000        MatTranspose_MPIAIJ,
3001 /*15*/ MatGetInfo_MPIAIJ,
3002        MatEqual_MPIAIJ,
3003        MatGetDiagonal_MPIAIJ,
3004        MatDiagonalScale_MPIAIJ,
3005        MatNorm_MPIAIJ,
3006 /*20*/ MatAssemblyBegin_MPIAIJ,
3007        MatAssemblyEnd_MPIAIJ,
3008        MatSetOption_MPIAIJ,
3009        MatZeroEntries_MPIAIJ,
3010 /*24*/ MatZeroRows_MPIAIJ,
3011        0,
3012 #ifdef PETSC_HAVE_PBGL
3013        0,
3014 #else
3015        0,
3016 #endif
3017        0,
3018        0,
3019 /*29*/ MatSetUp_MPIAIJ,
3020 #ifdef PETSC_HAVE_PBGL
3021        0,
3022 #else
3023        0,
3024 #endif
3025        0,
3026        0,
3027        0,
3028 /*34*/ MatDuplicate_MPIAIJ,
3029        0,
3030        0,
3031        0,
3032        0,
3033 /*39*/ MatAXPY_MPIAIJ,
3034        MatGetSubMatrices_MPIAIJ,
3035        MatIncreaseOverlap_MPIAIJ,
3036        MatGetValues_MPIAIJ,
3037        MatCopy_MPIAIJ,
3038 /*44*/ MatGetRowMax_MPIAIJ,
3039        MatScale_MPIAIJ,
3040        0,
3041        0,
3042        MatZeroRowsColumns_MPIAIJ,
3043 /*49*/ 0,
3044        0,
3045        0,
3046        0,
3047        0,
3048 /*54*/ MatFDColoringCreate_MPIAIJ,
3049        0,
3050        MatSetUnfactored_MPIAIJ,
3051        MatPermute_MPIAIJ,
3052        0,
3053 /*59*/ MatGetSubMatrix_MPIAIJ,
3054        MatDestroy_MPIAIJ,
3055        MatView_MPIAIJ,
3056        0,
3057        0,
3058 /*64*/ 0,
3059        0,
3060        0,
3061        0,
3062        0,
3063 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3064        MatGetRowMinAbs_MPIAIJ,
3065        0,
3066        MatSetColoring_MPIAIJ,
3067 #if defined(PETSC_HAVE_ADIC)
3068        MatSetValuesAdic_MPIAIJ,
3069 #else
3070        0,
3071 #endif
3072        MatSetValuesAdifor_MPIAIJ,
3073 /*75*/ MatFDColoringApply_AIJ,
3074        0,
3075        0,
3076        0,
3077        0,
3078 /*80*/ 0,
3079        0,
3080        0,
3081 /*83*/ MatLoad_MPIAIJ,
3082        0,
3083        0,
3084        0,
3085        0,
3086        0,
3087 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3088        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3089        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3090        MatPtAP_Basic,
3091        MatPtAPSymbolic_MPIAIJ,
3092 /*94*/ MatPtAPNumeric_MPIAIJ,
3093        0,
3094        0,
3095        0,
3096        0,
3097 /*99*/ 0,
3098        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3099        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3100        MatConjugate_MPIAIJ,
3101        0,
3102 /*104*/MatSetValuesRow_MPIAIJ,
3103        MatRealPart_MPIAIJ,
3104        MatImaginaryPart_MPIAIJ,
3105        0,
3106        0,
3107 /*109*/0,
3108        MatGetRedundantMatrix_MPIAIJ,
3109        MatGetRowMin_MPIAIJ,
3110        0,
3111        0,
3112 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3113        0,
3114        0,
3115        0,
3116        0,
3117 /*119*/0,
3118        0,
3119        0,
3120        0,
3121        MatGetMultiProcBlock_MPIAIJ,
3122 /*124*/MatFindNonZeroRows_MPIAIJ,
3123        MatGetColumnNorms_MPIAIJ,
3124        MatInvertBlockDiagonal_MPIAIJ,
3125        0,
3126        MatGetSubMatricesParallel_MPIAIJ,
3127 /*129*/0,
3128        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3129        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3130        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3131        0,
3132 /*134*/0,
3133        0,
3134        0,
3135        0,
3136        0
3137 };
3138 
3139 /* ----------------------------------------------------------------------------------------*/
3140 
3141 EXTERN_C_BEGIN
3142 #undef __FUNCT__
3143 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3144 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3145 {
3146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3147   PetscErrorCode ierr;
3148 
3149   PetscFunctionBegin;
3150   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3151   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3152   PetscFunctionReturn(0);
3153 }
3154 EXTERN_C_END
3155 
3156 EXTERN_C_BEGIN
3157 #undef __FUNCT__
3158 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3159 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3160 {
3161   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3162   PetscErrorCode ierr;
3163 
3164   PetscFunctionBegin;
3165   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3166   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3167   PetscFunctionReturn(0);
3168 }
3169 EXTERN_C_END
3170 
3171 EXTERN_C_BEGIN
3172 #undef __FUNCT__
3173 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3174 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3175 {
3176   Mat_MPIAIJ     *b;
3177   PetscErrorCode ierr;
3178   PetscInt       i;
3179   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3180 
3181   PetscFunctionBegin;
3182   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3183   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3184   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3185   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3186   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3187   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3188 
3189   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3190   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3191   if (d_nnz) {
3192     for (i=0; i<B->rmap->n; i++) {
3193       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3194     }
3195   }
3196   if (o_nnz) {
3197     for (i=0; i<B->rmap->n; i++) {
3198       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3199     }
3200   }
3201   b = (Mat_MPIAIJ*)B->data;
3202 
3203   if (!B->preallocated) {
3204     /* Explicitly create 2 MATSEQAIJ matrices. */
3205     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3206     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3207     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3208     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3209     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3210     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3211     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3212     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3213   }
3214 
3215   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3216   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3217   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3218   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3219   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3220   B->preallocated = PETSC_TRUE;
3221   PetscFunctionReturn(0);
3222 }
3223 EXTERN_C_END
3224 
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3227 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3228 {
3229   Mat            mat;
3230   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3231   PetscErrorCode ierr;
3232 
3233   PetscFunctionBegin;
3234   *newmat       = 0;
3235   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3236   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3237   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3238   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3239   a    = (Mat_MPIAIJ*)mat->data;
3240 
3241   mat->factortype    = matin->factortype;
3242   mat->rmap->bs      = matin->rmap->bs;
3243   mat->assembled    = PETSC_TRUE;
3244   mat->insertmode   = NOT_SET_VALUES;
3245   mat->preallocated = PETSC_TRUE;
3246 
3247   a->size           = oldmat->size;
3248   a->rank           = oldmat->rank;
3249   a->donotstash     = oldmat->donotstash;
3250   a->roworiented    = oldmat->roworiented;
3251   a->rowindices     = 0;
3252   a->rowvalues      = 0;
3253   a->getrowactive   = PETSC_FALSE;
3254 
3255   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3256   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3257 
3258   if (oldmat->colmap) {
3259 #if defined (PETSC_USE_CTABLE)
3260     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3261 #else
3262     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3263     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3264     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3265 #endif
3266   } else a->colmap = 0;
3267   if (oldmat->garray) {
3268     PetscInt len;
3269     len  = oldmat->B->cmap->n;
3270     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3271     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3272     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3273   } else a->garray = 0;
3274 
3275   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3276   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3277   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3278   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3279   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3280   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3281   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3282   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3283   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3284   *newmat = mat;
3285   PetscFunctionReturn(0);
3286 }
3287 
3288 
3289 
3290 #undef __FUNCT__
3291 #define __FUNCT__ "MatLoad_MPIAIJ"
3292 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3293 {
3294   PetscScalar    *vals,*svals;
3295   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3296   PetscErrorCode ierr;
3297   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3298   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3299   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3300   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3301   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3302   int            fd;
3303 
3304   PetscFunctionBegin;
3305   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3306   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3307   if (!rank) {
3308     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3309     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3310     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3311   }
3312 
3313   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3314 
3315   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3316   M = header[1]; N = header[2];
3317   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3318   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3319   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3320 
3321   /* If global sizes are set, check if they are consistent with that given in the file */
3322   if (sizesset) {
3323     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3324   }
3325   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3326   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3327 
3328   /* determine ownership of all rows */
3329   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3330   else m = newMat->rmap->n; /* Set by user */
3331 
3332   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3333   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3334 
3335   /* First process needs enough room for process with most rows */
3336   if (!rank) {
3337     mmax       = rowners[1];
3338     for (i=2; i<size; i++) {
3339       mmax = PetscMax(mmax,rowners[i]);
3340     }
3341   } else mmax = m;
3342 
3343   rowners[0] = 0;
3344   for (i=2; i<=size; i++) {
3345     rowners[i] += rowners[i-1];
3346   }
3347   rstart = rowners[rank];
3348   rend   = rowners[rank+1];
3349 
3350   /* distribute row lengths to all processors */
3351   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3352   if (!rank) {
3353     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3354     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3355     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3356     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3357     for (j=0; j<m; j++) {
3358       procsnz[0] += ourlens[j];
3359     }
3360     for (i=1; i<size; i++) {
3361       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3362       /* calculate the number of nonzeros on each processor */
3363       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3364         procsnz[i] += rowlengths[j];
3365       }
3366       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3367     }
3368     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3369   } else {
3370     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3371   }
3372 
3373   if (!rank) {
3374     /* determine max buffer needed and allocate it */
3375     maxnz = 0;
3376     for (i=0; i<size; i++) {
3377       maxnz = PetscMax(maxnz,procsnz[i]);
3378     }
3379     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3380 
3381     /* read in my part of the matrix column indices  */
3382     nz   = procsnz[0];
3383     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3384     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3385 
3386     /* read in every one elses and ship off */
3387     for (i=1; i<size; i++) {
3388       nz     = procsnz[i];
3389       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3390       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3391     }
3392     ierr = PetscFree(cols);CHKERRQ(ierr);
3393   } else {
3394     /* determine buffer space needed for message */
3395     nz = 0;
3396     for (i=0; i<m; i++) {
3397       nz += ourlens[i];
3398     }
3399     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3400 
3401     /* receive message of column indices*/
3402     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3403   }
3404 
3405   /* determine column ownership if matrix is not square */
3406   if (N != M) {
3407     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3408     else n = newMat->cmap->n;
3409     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3410     cstart = cend - n;
3411   } else {
3412     cstart = rstart;
3413     cend   = rend;
3414     n      = cend - cstart;
3415   }
3416 
3417   /* loop over local rows, determining number of off diagonal entries */
3418   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3419   jj = 0;
3420   for (i=0; i<m; i++) {
3421     for (j=0; j<ourlens[i]; j++) {
3422       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3423       jj++;
3424     }
3425   }
3426 
3427   for (i=0; i<m; i++) {
3428     ourlens[i] -= offlens[i];
3429   }
3430   if (!sizesset) {
3431     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3432   }
3433   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3434 
3435   for (i=0; i<m; i++) {
3436     ourlens[i] += offlens[i];
3437   }
3438 
3439   if (!rank) {
3440     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3441 
3442     /* read in my part of the matrix numerical values  */
3443     nz   = procsnz[0];
3444     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3445 
3446     /* insert into matrix */
3447     jj      = rstart;
3448     smycols = mycols;
3449     svals   = vals;
3450     for (i=0; i<m; i++) {
3451       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3452       smycols += ourlens[i];
3453       svals   += ourlens[i];
3454       jj++;
3455     }
3456 
3457     /* read in other processors and ship out */
3458     for (i=1; i<size; i++) {
3459       nz     = procsnz[i];
3460       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3461       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3462     }
3463     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3464   } else {
3465     /* receive numeric values */
3466     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3467 
3468     /* receive message of values*/
3469     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3470 
3471     /* insert into matrix */
3472     jj      = rstart;
3473     smycols = mycols;
3474     svals   = vals;
3475     for (i=0; i<m; i++) {
3476       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3477       smycols += ourlens[i];
3478       svals   += ourlens[i];
3479       jj++;
3480     }
3481   }
3482   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3483   ierr = PetscFree(vals);CHKERRQ(ierr);
3484   ierr = PetscFree(mycols);CHKERRQ(ierr);
3485   ierr = PetscFree(rowners);CHKERRQ(ierr);
3486 
3487   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3488   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 #undef __FUNCT__
3493 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3494 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3495 {
3496   PetscErrorCode ierr;
3497   IS             iscol_local;
3498   PetscInt       csize;
3499 
3500   PetscFunctionBegin;
3501   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3502   if (call == MAT_REUSE_MATRIX) {
3503     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3504     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3505   } else {
3506     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3507   }
3508   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3509   if (call == MAT_INITIAL_MATRIX) {
3510     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3511     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3512   }
3513   PetscFunctionReturn(0);
3514 }
3515 
3516 #undef __FUNCT__
3517 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3518 /*
3519     Not great since it makes two copies of the submatrix, first an SeqAIJ
3520   in local and then by concatenating the local matrices the end result.
3521   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3522 
3523   Note: This requires a sequential iscol with all indices.
3524 */
3525 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3526 {
3527   PetscErrorCode ierr;
3528   PetscMPIInt    rank,size;
3529   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3530   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3531   Mat            *local,M,Mreuse;
3532   MatScalar      *vwork,*aa;
3533   MPI_Comm       comm = ((PetscObject)mat)->comm;
3534   Mat_SeqAIJ     *aij;
3535 
3536 
3537   PetscFunctionBegin;
3538   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3539   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3540 
3541   if (call ==  MAT_REUSE_MATRIX) {
3542     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3543     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3544     local = &Mreuse;
3545     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3546   } else {
3547     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3548     Mreuse = *local;
3549     ierr   = PetscFree(local);CHKERRQ(ierr);
3550   }
3551 
3552   /*
3553       m - number of local rows
3554       n - number of columns (same on all processors)
3555       rstart - first row in new global matrix generated
3556   */
3557   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3558   if (call == MAT_INITIAL_MATRIX) {
3559     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3560     ii  = aij->i;
3561     jj  = aij->j;
3562 
3563     /*
3564         Determine the number of non-zeros in the diagonal and off-diagonal
3565         portions of the matrix in order to do correct preallocation
3566     */
3567 
3568     /* first get start and end of "diagonal" columns */
3569     if (csize == PETSC_DECIDE) {
3570       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3571       if (mglobal == n) { /* square matrix */
3572 	nlocal = m;
3573       } else {
3574         nlocal = n/size + ((n % size) > rank);
3575       }
3576     } else {
3577       nlocal = csize;
3578     }
3579     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3580     rstart = rend - nlocal;
3581     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3582 
3583     /* next, compute all the lengths */
3584     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3585     olens = dlens + m;
3586     for (i=0; i<m; i++) {
3587       jend = ii[i+1] - ii[i];
3588       olen = 0;
3589       dlen = 0;
3590       for (j=0; j<jend; j++) {
3591         if (*jj < rstart || *jj >= rend) olen++;
3592         else dlen++;
3593         jj++;
3594       }
3595       olens[i] = olen;
3596       dlens[i] = dlen;
3597     }
3598     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3599     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3600     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3601     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3602     ierr = PetscFree(dlens);CHKERRQ(ierr);
3603   } else {
3604     PetscInt ml,nl;
3605 
3606     M = *newmat;
3607     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3608     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3609     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3610     /*
3611          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3612        rather than the slower MatSetValues().
3613     */
3614     M->was_assembled = PETSC_TRUE;
3615     M->assembled     = PETSC_FALSE;
3616   }
3617   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3618   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3619   ii  = aij->i;
3620   jj  = aij->j;
3621   aa  = aij->a;
3622   for (i=0; i<m; i++) {
3623     row   = rstart + i;
3624     nz    = ii[i+1] - ii[i];
3625     cwork = jj;     jj += nz;
3626     vwork = aa;     aa += nz;
3627     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3628   }
3629 
3630   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   *newmat = M;
3633 
3634   /* save submatrix used in processor for next request */
3635   if (call ==  MAT_INITIAL_MATRIX) {
3636     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3637     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3638   }
3639 
3640   PetscFunctionReturn(0);
3641 }
3642 
3643 EXTERN_C_BEGIN
3644 #undef __FUNCT__
3645 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3646 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3647 {
3648   PetscInt       m,cstart, cend,j,nnz,i,d;
3649   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3650   const PetscInt *JJ;
3651   PetscScalar    *values;
3652   PetscErrorCode ierr;
3653 
3654   PetscFunctionBegin;
3655   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3656 
3657   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3658   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3659   m      = B->rmap->n;
3660   cstart = B->cmap->rstart;
3661   cend   = B->cmap->rend;
3662   rstart = B->rmap->rstart;
3663 
3664   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3665 
3666 #if defined(PETSC_USE_DEBUGGING)
3667   for (i=0; i<m; i++) {
3668     nnz     = Ii[i+1]- Ii[i];
3669     JJ      = J + Ii[i];
3670     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3671     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3672     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3673   }
3674 #endif
3675 
3676   for (i=0; i<m; i++) {
3677     nnz     = Ii[i+1]- Ii[i];
3678     JJ      = J + Ii[i];
3679     nnz_max = PetscMax(nnz_max,nnz);
3680     d       = 0;
3681     for (j=0; j<nnz; j++) {
3682       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3683     }
3684     d_nnz[i] = d;
3685     o_nnz[i] = nnz - d;
3686   }
3687   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3688   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3689 
3690   if (v) values = (PetscScalar*)v;
3691   else {
3692     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3693     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3694   }
3695 
3696   for (i=0; i<m; i++) {
3697     ii   = i + rstart;
3698     nnz  = Ii[i+1]- Ii[i];
3699     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3700   }
3701   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3702   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3703 
3704   if (!v) {
3705     ierr = PetscFree(values);CHKERRQ(ierr);
3706   }
3707   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3708   PetscFunctionReturn(0);
3709 }
3710 EXTERN_C_END
3711 
3712 #undef __FUNCT__
3713 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3714 /*@
3715    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3716    (the default parallel PETSc format).
3717 
3718    Collective on MPI_Comm
3719 
3720    Input Parameters:
3721 +  B - the matrix
3722 .  i - the indices into j for the start of each local row (starts with zero)
3723 .  j - the column indices for each local row (starts with zero)
3724 -  v - optional values in the matrix
3725 
3726    Level: developer
3727 
3728    Notes:
3729        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3730      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3731      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3732 
3733        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3734 
3735        The format which is used for the sparse matrix input, is equivalent to a
3736     row-major ordering.. i.e for the following matrix, the input data expected is
3737     as shown:
3738 
3739         1 0 0
3740         2 0 3     P0
3741        -------
3742         4 5 6     P1
3743 
3744      Process0 [P0]: rows_owned=[0,1]
3745         i =  {0,1,3}  [size = nrow+1  = 2+1]
3746         j =  {0,0,2}  [size = nz = 6]
3747         v =  {1,2,3}  [size = nz = 6]
3748 
3749      Process1 [P1]: rows_owned=[2]
3750         i =  {0,3}    [size = nrow+1  = 1+1]
3751         j =  {0,1,2}  [size = nz = 6]
3752         v =  {4,5,6}  [size = nz = 6]
3753 
3754 .keywords: matrix, aij, compressed row, sparse, parallel
3755 
3756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3757           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3758 @*/
3759 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3760 {
3761   PetscErrorCode ierr;
3762 
3763   PetscFunctionBegin;
3764   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3765   PetscFunctionReturn(0);
3766 }
3767 
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3770 /*@C
3771    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3772    (the default parallel PETSc format).  For good matrix assembly performance
3773    the user should preallocate the matrix storage by setting the parameters
3774    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3775    performance can be increased by more than a factor of 50.
3776 
3777    Collective on MPI_Comm
3778 
3779    Input Parameters:
3780 +  A - the matrix
3781 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3782            (same value is used for all local rows)
3783 .  d_nnz - array containing the number of nonzeros in the various rows of the
3784            DIAGONAL portion of the local submatrix (possibly different for each row)
3785            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3786            The size of this array is equal to the number of local rows, i.e 'm'.
3787            For matrices that will be factored, you must leave room for (and set)
3788            the diagonal entry even if it is zero.
3789 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3790            submatrix (same value is used for all local rows).
3791 -  o_nnz - array containing the number of nonzeros in the various rows of the
3792            OFF-DIAGONAL portion of the local submatrix (possibly different for
3793            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3794            structure. The size of this array is equal to the number
3795            of local rows, i.e 'm'.
3796 
3797    If the *_nnz parameter is given then the *_nz parameter is ignored
3798 
3799    The AIJ format (also called the Yale sparse matrix format or
3800    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3801    storage.  The stored row and column indices begin with zero.
3802    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3803 
3804    The parallel matrix is partitioned such that the first m0 rows belong to
3805    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3806    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3807 
3808    The DIAGONAL portion of the local submatrix of a processor can be defined
3809    as the submatrix which is obtained by extraction the part corresponding to
3810    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3811    first row that belongs to the processor, r2 is the last row belonging to
3812    the this processor, and c1-c2 is range of indices of the local part of a
3813    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3814    common case of a square matrix, the row and column ranges are the same and
3815    the DIAGONAL part is also square. The remaining portion of the local
3816    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3817 
3818    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3819 
3820    You can call MatGetInfo() to get information on how effective the preallocation was;
3821    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3822    You can also run with the option -info and look for messages with the string
3823    malloc in them to see if additional memory allocation was needed.
3824 
3825    Example usage:
3826 
3827    Consider the following 8x8 matrix with 34 non-zero values, that is
3828    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3829    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3830    as follows:
3831 
3832 .vb
3833             1  2  0  |  0  3  0  |  0  4
3834     Proc0   0  5  6  |  7  0  0  |  8  0
3835             9  0 10  | 11  0  0  | 12  0
3836     -------------------------------------
3837            13  0 14  | 15 16 17  |  0  0
3838     Proc1   0 18  0  | 19 20 21  |  0  0
3839             0  0  0  | 22 23  0  | 24  0
3840     -------------------------------------
3841     Proc2  25 26 27  |  0  0 28  | 29  0
3842            30  0  0  | 31 32 33  |  0 34
3843 .ve
3844 
3845    This can be represented as a collection of submatrices as:
3846 
3847 .vb
3848       A B C
3849       D E F
3850       G H I
3851 .ve
3852 
3853    Where the submatrices A,B,C are owned by proc0, D,E,F are
3854    owned by proc1, G,H,I are owned by proc2.
3855 
3856    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3857    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3858    The 'M','N' parameters are 8,8, and have the same values on all procs.
3859 
3860    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3861    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3862    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3863    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3864    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3865    matrix, ans [DF] as another SeqAIJ matrix.
3866 
3867    When d_nz, o_nz parameters are specified, d_nz storage elements are
3868    allocated for every row of the local diagonal submatrix, and o_nz
3869    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3870    One way to choose d_nz and o_nz is to use the max nonzerors per local
3871    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3872    In this case, the values of d_nz,o_nz are:
3873 .vb
3874      proc0 : dnz = 2, o_nz = 2
3875      proc1 : dnz = 3, o_nz = 2
3876      proc2 : dnz = 1, o_nz = 4
3877 .ve
3878    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3879    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3880    for proc3. i.e we are using 12+15+10=37 storage locations to store
3881    34 values.
3882 
3883    When d_nnz, o_nnz parameters are specified, the storage is specified
3884    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3885    In the above case the values for d_nnz,o_nnz are:
3886 .vb
3887      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3888      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3889      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3890 .ve
3891    Here the space allocated is sum of all the above values i.e 34, and
3892    hence pre-allocation is perfect.
3893 
3894    Level: intermediate
3895 
3896 .keywords: matrix, aij, compressed row, sparse, parallel
3897 
3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3899           MPIAIJ, MatGetInfo()
3900 @*/
3901 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3902 {
3903   PetscErrorCode ierr;
3904 
3905   PetscFunctionBegin;
3906   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3907   PetscValidType(B,1);
3908   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3909   PetscFunctionReturn(0);
3910 }
3911 
3912 #undef __FUNCT__
3913 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3914 /*@
3915      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3916          CSR format the local rows.
3917 
3918    Collective on MPI_Comm
3919 
3920    Input Parameters:
3921 +  comm - MPI communicator
3922 .  m - number of local rows (Cannot be PETSC_DECIDE)
3923 .  n - This value should be the same as the local size used in creating the
3924        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3925        calculated if N is given) For square matrices n is almost always m.
3926 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3927 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3928 .   i - row indices
3929 .   j - column indices
3930 -   a - matrix values
3931 
3932    Output Parameter:
3933 .   mat - the matrix
3934 
3935    Level: intermediate
3936 
3937    Notes:
3938        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3939      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3940      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3941 
3942        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3943 
3944        The format which is used for the sparse matrix input, is equivalent to a
3945     row-major ordering.. i.e for the following matrix, the input data expected is
3946     as shown:
3947 
3948         1 0 0
3949         2 0 3     P0
3950        -------
3951         4 5 6     P1
3952 
3953      Process0 [P0]: rows_owned=[0,1]
3954         i =  {0,1,3}  [size = nrow+1  = 2+1]
3955         j =  {0,0,2}  [size = nz = 6]
3956         v =  {1,2,3}  [size = nz = 6]
3957 
3958      Process1 [P1]: rows_owned=[2]
3959         i =  {0,3}    [size = nrow+1  = 1+1]
3960         j =  {0,1,2}  [size = nz = 6]
3961         v =  {4,5,6}  [size = nz = 6]
3962 
3963 .keywords: matrix, aij, compressed row, sparse, parallel
3964 
3965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3966           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3967 @*/
3968 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3969 {
3970   PetscErrorCode ierr;
3971 
3972   PetscFunctionBegin;
3973   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3974   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3975   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3976   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3977   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3978   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3979   PetscFunctionReturn(0);
3980 }
3981 
3982 #undef __FUNCT__
3983 #define __FUNCT__ "MatCreateAIJ"
3984 /*@C
3985    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3986    (the default parallel PETSc format).  For good matrix assembly performance
3987    the user should preallocate the matrix storage by setting the parameters
3988    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3989    performance can be increased by more than a factor of 50.
3990 
3991    Collective on MPI_Comm
3992 
3993    Input Parameters:
3994 +  comm - MPI communicator
3995 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3996            This value should be the same as the local size used in creating the
3997            y vector for the matrix-vector product y = Ax.
3998 .  n - This value should be the same as the local size used in creating the
3999        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4000        calculated if N is given) For square matrices n is almost always m.
4001 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4002 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4003 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4004            (same value is used for all local rows)
4005 .  d_nnz - array containing the number of nonzeros in the various rows of the
4006            DIAGONAL portion of the local submatrix (possibly different for each row)
4007            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4008            The size of this array is equal to the number of local rows, i.e 'm'.
4009 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4010            submatrix (same value is used for all local rows).
4011 -  o_nnz - array containing the number of nonzeros in the various rows of the
4012            OFF-DIAGONAL portion of the local submatrix (possibly different for
4013            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4014            structure. The size of this array is equal to the number
4015            of local rows, i.e 'm'.
4016 
4017    Output Parameter:
4018 .  A - the matrix
4019 
4020    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4021    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4022    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4023 
4024    Notes:
4025    If the *_nnz parameter is given then the *_nz parameter is ignored
4026 
4027    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4028    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4029    storage requirements for this matrix.
4030 
4031    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4032    processor than it must be used on all processors that share the object for
4033    that argument.
4034 
4035    The user MUST specify either the local or global matrix dimensions
4036    (possibly both).
4037 
4038    The parallel matrix is partitioned across processors such that the
4039    first m0 rows belong to process 0, the next m1 rows belong to
4040    process 1, the next m2 rows belong to process 2 etc.. where
4041    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4042    values corresponding to [m x N] submatrix.
4043 
4044    The columns are logically partitioned with the n0 columns belonging
4045    to 0th partition, the next n1 columns belonging to the next
4046    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4047 
4048    The DIAGONAL portion of the local submatrix on any given processor
4049    is the submatrix corresponding to the rows and columns m,n
4050    corresponding to the given processor. i.e diagonal matrix on
4051    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4052    etc. The remaining portion of the local submatrix [m x (N-n)]
4053    constitute the OFF-DIAGONAL portion. The example below better
4054    illustrates this concept.
4055 
4056    For a square global matrix we define each processor's diagonal portion
4057    to be its local rows and the corresponding columns (a square submatrix);
4058    each processor's off-diagonal portion encompasses the remainder of the
4059    local matrix (a rectangular submatrix).
4060 
4061    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4062 
4063    When calling this routine with a single process communicator, a matrix of
4064    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4065    type of communicator, use the construction mechanism:
4066      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4067 
4068    By default, this format uses inodes (identical nodes) when possible.
4069    We search for consecutive rows with the same nonzero structure, thereby
4070    reusing matrix information to achieve increased efficiency.
4071 
4072    Options Database Keys:
4073 +  -mat_no_inode  - Do not use inodes
4074 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4075 -  -mat_aij_oneindex - Internally use indexing starting at 1
4076         rather than 0.  Note that when calling MatSetValues(),
4077         the user still MUST index entries starting at 0!
4078 
4079 
4080    Example usage:
4081 
4082    Consider the following 8x8 matrix with 34 non-zero values, that is
4083    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4084    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4085    as follows:
4086 
4087 .vb
4088             1  2  0  |  0  3  0  |  0  4
4089     Proc0   0  5  6  |  7  0  0  |  8  0
4090             9  0 10  | 11  0  0  | 12  0
4091     -------------------------------------
4092            13  0 14  | 15 16 17  |  0  0
4093     Proc1   0 18  0  | 19 20 21  |  0  0
4094             0  0  0  | 22 23  0  | 24  0
4095     -------------------------------------
4096     Proc2  25 26 27  |  0  0 28  | 29  0
4097            30  0  0  | 31 32 33  |  0 34
4098 .ve
4099 
4100    This can be represented as a collection of submatrices as:
4101 
4102 .vb
4103       A B C
4104       D E F
4105       G H I
4106 .ve
4107 
4108    Where the submatrices A,B,C are owned by proc0, D,E,F are
4109    owned by proc1, G,H,I are owned by proc2.
4110 
4111    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4112    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4113    The 'M','N' parameters are 8,8, and have the same values on all procs.
4114 
4115    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4116    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4117    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4118    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4119    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4120    matrix, ans [DF] as another SeqAIJ matrix.
4121 
4122    When d_nz, o_nz parameters are specified, d_nz storage elements are
4123    allocated for every row of the local diagonal submatrix, and o_nz
4124    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4125    One way to choose d_nz and o_nz is to use the max nonzerors per local
4126    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4127    In this case, the values of d_nz,o_nz are:
4128 .vb
4129      proc0 : dnz = 2, o_nz = 2
4130      proc1 : dnz = 3, o_nz = 2
4131      proc2 : dnz = 1, o_nz = 4
4132 .ve
4133    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4134    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4135    for proc3. i.e we are using 12+15+10=37 storage locations to store
4136    34 values.
4137 
4138    When d_nnz, o_nnz parameters are specified, the storage is specified
4139    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4140    In the above case the values for d_nnz,o_nnz are:
4141 .vb
4142      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4143      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4144      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4145 .ve
4146    Here the space allocated is sum of all the above values i.e 34, and
4147    hence pre-allocation is perfect.
4148 
4149    Level: intermediate
4150 
4151 .keywords: matrix, aij, compressed row, sparse, parallel
4152 
4153 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4154           MPIAIJ, MatCreateMPIAIJWithArrays()
4155 @*/
4156 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4157 {
4158   PetscErrorCode ierr;
4159   PetscMPIInt    size;
4160 
4161   PetscFunctionBegin;
4162   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4163   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4164   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4165   if (size > 1) {
4166     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4167     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4168   } else {
4169     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4170     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4171   }
4172   PetscFunctionReturn(0);
4173 }
4174 
4175 #undef __FUNCT__
4176 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4177 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4178 {
4179   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4180 
4181   PetscFunctionBegin;
4182   *Ad     = a->A;
4183   *Ao     = a->B;
4184   *colmap = a->garray;
4185   PetscFunctionReturn(0);
4186 }
4187 
4188 #undef __FUNCT__
4189 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4190 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4191 {
4192   PetscErrorCode ierr;
4193   PetscInt       i;
4194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4195 
4196   PetscFunctionBegin;
4197   if (coloring->ctype == IS_COLORING_GLOBAL) {
4198     ISColoringValue *allcolors,*colors;
4199     ISColoring      ocoloring;
4200 
4201     /* set coloring for diagonal portion */
4202     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4203 
4204     /* set coloring for off-diagonal portion */
4205     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4206     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4207     for (i=0; i<a->B->cmap->n; i++) {
4208       colors[i] = allcolors[a->garray[i]];
4209     }
4210     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4211     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4212     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4213     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4214   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4215     ISColoringValue *colors;
4216     PetscInt        *larray;
4217     ISColoring      ocoloring;
4218 
4219     /* set coloring for diagonal portion */
4220     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4221     for (i=0; i<a->A->cmap->n; i++) {
4222       larray[i] = i + A->cmap->rstart;
4223     }
4224     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4225     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4226     for (i=0; i<a->A->cmap->n; i++) {
4227       colors[i] = coloring->colors[larray[i]];
4228     }
4229     ierr = PetscFree(larray);CHKERRQ(ierr);
4230     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4231     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4232     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4233 
4234     /* set coloring for off-diagonal portion */
4235     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4236     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4237     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4238     for (i=0; i<a->B->cmap->n; i++) {
4239       colors[i] = coloring->colors[larray[i]];
4240     }
4241     ierr = PetscFree(larray);CHKERRQ(ierr);
4242     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4243     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4244     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4245   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4246 
4247   PetscFunctionReturn(0);
4248 }
4249 
4250 #if defined(PETSC_HAVE_ADIC)
4251 #undef __FUNCT__
4252 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4253 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4254 {
4255   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4256   PetscErrorCode ierr;
4257 
4258   PetscFunctionBegin;
4259   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4260   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4261   PetscFunctionReturn(0);
4262 }
4263 #endif
4264 
4265 #undef __FUNCT__
4266 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4267 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4268 {
4269   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4270   PetscErrorCode ierr;
4271 
4272   PetscFunctionBegin;
4273   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4274   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4275   PetscFunctionReturn(0);
4276 }
4277 
4278 #undef __FUNCT__
4279 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4280 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4281 {
4282   PetscErrorCode ierr;
4283   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum;
4284   PetscInt       *indx;
4285 
4286   PetscFunctionBegin;
4287   /* This routine will ONLY return MPIAIJ type matrix */
4288   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4289   if (n == PETSC_DECIDE){
4290     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4291   }
4292   /* Check sum(n) = N */
4293   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4294   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4295 
4296   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4297   rstart -= m;
4298 
4299   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4300   for (i=0;i<m;i++) {
4301     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4302     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4303     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4304   }
4305 
4306   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4307   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4308   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4309   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4310   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4311   PetscFunctionReturn(0);
4312 }
4313 
4314 #undef __FUNCT__
4315 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4316 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4317 {
4318   PetscErrorCode ierr;
4319   PetscInt       m,N,i,rstart,nnz,Ii;
4320   PetscInt       *indx;
4321   PetscScalar    *values;
4322 
4323   PetscFunctionBegin;
4324   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4325   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4326   for (i=0;i<m;i++) {
4327     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4328     Ii    = i + rstart;
4329     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4330     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4331   }
4332   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4333   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 #undef __FUNCT__
4338 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4339 /*@
4340       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4341                  matrices from each processor
4342 
4343     Collective on MPI_Comm
4344 
4345    Input Parameters:
4346 +    comm - the communicators the parallel matrix will live on
4347 .    inmat - the input sequential matrices
4348 .    n - number of local columns (or PETSC_DECIDE)
4349 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4350 
4351    Output Parameter:
4352 .    outmat - the parallel matrix generated
4353 
4354     Level: advanced
4355 
4356    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4357 
4358 @*/
4359 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4360 {
4361   PetscErrorCode ierr;
4362 
4363   PetscFunctionBegin;
4364   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4365   if (scall == MAT_INITIAL_MATRIX){
4366     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4367   }
4368   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4369   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4370   PetscFunctionReturn(0);
4371 }
4372 
4373 #undef __FUNCT__
4374 #define __FUNCT__ "MatFileSplit"
4375 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4376 {
4377   PetscErrorCode    ierr;
4378   PetscMPIInt       rank;
4379   PetscInt          m,N,i,rstart,nnz;
4380   size_t            len;
4381   const PetscInt    *indx;
4382   PetscViewer       out;
4383   char              *name;
4384   Mat               B;
4385   const PetscScalar *values;
4386 
4387   PetscFunctionBegin;
4388   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4389   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4390   /* Should this be the type of the diagonal block of A? */
4391   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4392   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4393   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4394   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4395   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4396   for (i=0;i<m;i++) {
4397     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4398     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4399     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4400   }
4401   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4402   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4403 
4404   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4405   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4406   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4407   sprintf(name,"%s.%d",outfile,rank);
4408   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4409   ierr = PetscFree(name);
4410   ierr = MatView(B,out);CHKERRQ(ierr);
4411   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4412   ierr = MatDestroy(&B);CHKERRQ(ierr);
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4417 #undef __FUNCT__
4418 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4419 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4420 {
4421   PetscErrorCode       ierr;
4422   Mat_Merge_SeqsToMPI  *merge;
4423   PetscContainer       container;
4424 
4425   PetscFunctionBegin;
4426   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4427   if (container) {
4428     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4430     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4431     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4432     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4433     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4434     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4435     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4436     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4441     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4442     ierr = PetscFree(merge);CHKERRQ(ierr);
4443     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4444   }
4445   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4446   PetscFunctionReturn(0);
4447 }
4448 
4449 #include <../src/mat/utils/freespace.h>
4450 #include <petscbt.h>
4451 
4452 #undef __FUNCT__
4453 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4454 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4455 {
4456   PetscErrorCode       ierr;
4457   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4458   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4459   PetscMPIInt          size,rank,taga,*len_s;
4460   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4461   PetscInt             proc,m;
4462   PetscInt             **buf_ri,**buf_rj;
4463   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4464   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4465   MPI_Request          *s_waits,*r_waits;
4466   MPI_Status           *status;
4467   MatScalar            *aa=a->a;
4468   MatScalar            **abuf_r,*ba_i;
4469   Mat_Merge_SeqsToMPI  *merge;
4470   PetscContainer       container;
4471 
4472   PetscFunctionBegin;
4473   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4474 
4475   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4476   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4477 
4478   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4479   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4480 
4481   bi     = merge->bi;
4482   bj     = merge->bj;
4483   buf_ri = merge->buf_ri;
4484   buf_rj = merge->buf_rj;
4485 
4486   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4487   owners = merge->rowmap->range;
4488   len_s  = merge->len_s;
4489 
4490   /* send and recv matrix values */
4491   /*-----------------------------*/
4492   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4493   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4494 
4495   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4496   for (proc=0,k=0; proc<size; proc++){
4497     if (!len_s[proc]) continue;
4498     i = owners[proc];
4499     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4500     k++;
4501   }
4502 
4503   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4504   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4505   ierr = PetscFree(status);CHKERRQ(ierr);
4506 
4507   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4508   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4509 
4510   /* insert mat values of mpimat */
4511   /*----------------------------*/
4512   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4513   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4514 
4515   for (k=0; k<merge->nrecv; k++){
4516     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4517     nrows = *(buf_ri_k[k]);
4518     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4519     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4520   }
4521 
4522   /* set values of ba */
4523   m = merge->rowmap->n;
4524   for (i=0; i<m; i++) {
4525     arow = owners[rank] + i;
4526     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4527     bnzi = bi[i+1] - bi[i];
4528     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4529 
4530     /* add local non-zero vals of this proc's seqmat into ba */
4531     anzi = ai[arow+1] - ai[arow];
4532     aj   = a->j + ai[arow];
4533     aa   = a->a + ai[arow];
4534     nextaj = 0;
4535     for (j=0; nextaj<anzi; j++){
4536       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4537         ba_i[j] += aa[nextaj++];
4538       }
4539     }
4540 
4541     /* add received vals into ba */
4542     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4543       /* i-th row */
4544       if (i == *nextrow[k]) {
4545         anzi = *(nextai[k]+1) - *nextai[k];
4546         aj   = buf_rj[k] + *(nextai[k]);
4547         aa   = abuf_r[k] + *(nextai[k]);
4548         nextaj = 0;
4549         for (j=0; nextaj<anzi; j++){
4550           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4551             ba_i[j] += aa[nextaj++];
4552           }
4553         }
4554         nextrow[k]++; nextai[k]++;
4555       }
4556     }
4557     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4558   }
4559   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4560   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4561 
4562   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4563   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4564   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4565   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4566   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4567   PetscFunctionReturn(0);
4568 }
4569 
4570 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4571 
4572 #undef __FUNCT__
4573 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4574 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4575 {
4576   PetscErrorCode       ierr;
4577   Mat                  B_mpi;
4578   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4579   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4580   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4581   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4582   PetscInt             len,proc,*dnz,*onz;
4583   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4584   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4585   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4586   MPI_Status           *status;
4587   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4588   PetscBT              lnkbt;
4589   Mat_Merge_SeqsToMPI  *merge;
4590   PetscContainer       container;
4591 
4592   PetscFunctionBegin;
4593   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4594 
4595   /* make sure it is a PETSc comm */
4596   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4597   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4598   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4599 
4600   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4601   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4602 
4603   /* determine row ownership */
4604   /*---------------------------------------------------------*/
4605   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4606   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4607   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4608   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4609   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4610   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4611   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4612 
4613   m      = merge->rowmap->n;
4614   M      = merge->rowmap->N;
4615   owners = merge->rowmap->range;
4616 
4617   /* determine the number of messages to send, their lengths */
4618   /*---------------------------------------------------------*/
4619   len_s  = merge->len_s;
4620 
4621   len = 0;  /* length of buf_si[] */
4622   merge->nsend = 0;
4623   for (proc=0; proc<size; proc++){
4624     len_si[proc] = 0;
4625     if (proc == rank){
4626       len_s[proc] = 0;
4627     } else {
4628       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4629       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4630     }
4631     if (len_s[proc]) {
4632       merge->nsend++;
4633       nrows = 0;
4634       for (i=owners[proc]; i<owners[proc+1]; i++){
4635         if (ai[i+1] > ai[i]) nrows++;
4636       }
4637       len_si[proc] = 2*(nrows+1);
4638       len += len_si[proc];
4639     }
4640   }
4641 
4642   /* determine the number and length of messages to receive for ij-structure */
4643   /*-------------------------------------------------------------------------*/
4644   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4645   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4646 
4647   /* post the Irecv of j-structure */
4648   /*-------------------------------*/
4649   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4650   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4651 
4652   /* post the Isend of j-structure */
4653   /*--------------------------------*/
4654   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4655 
4656   for (proc=0, k=0; proc<size; proc++){
4657     if (!len_s[proc]) continue;
4658     i = owners[proc];
4659     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4660     k++;
4661   }
4662 
4663   /* receives and sends of j-structure are complete */
4664   /*------------------------------------------------*/
4665   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4666   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4667 
4668   /* send and recv i-structure */
4669   /*---------------------------*/
4670   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4671   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4672 
4673   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4674   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4675   for (proc=0,k=0; proc<size; proc++){
4676     if (!len_s[proc]) continue;
4677     /* form outgoing message for i-structure:
4678          buf_si[0]:                 nrows to be sent
4679                [1:nrows]:           row index (global)
4680                [nrows+1:2*nrows+1]: i-structure index
4681     */
4682     /*-------------------------------------------*/
4683     nrows = len_si[proc]/2 - 1;
4684     buf_si_i    = buf_si + nrows+1;
4685     buf_si[0]   = nrows;
4686     buf_si_i[0] = 0;
4687     nrows = 0;
4688     for (i=owners[proc]; i<owners[proc+1]; i++){
4689       anzi = ai[i+1] - ai[i];
4690       if (anzi) {
4691         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4692         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4693         nrows++;
4694       }
4695     }
4696     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4697     k++;
4698     buf_si += len_si[proc];
4699   }
4700 
4701   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4702   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4703 
4704   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4705   for (i=0; i<merge->nrecv; i++){
4706     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4707   }
4708 
4709   ierr = PetscFree(len_si);CHKERRQ(ierr);
4710   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4711   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4712   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4713   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4714   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4715   ierr = PetscFree(status);CHKERRQ(ierr);
4716 
4717   /* compute a local seq matrix in each processor */
4718   /*----------------------------------------------*/
4719   /* allocate bi array and free space for accumulating nonzero column info */
4720   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4721   bi[0] = 0;
4722 
4723   /* create and initialize a linked list */
4724   nlnk = N+1;
4725   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4726 
4727   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4728   len = 0;
4729   len  = ai[owners[rank+1]] - ai[owners[rank]];
4730   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4731   current_space = free_space;
4732 
4733   /* determine symbolic info for each local row */
4734   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4735 
4736   for (k=0; k<merge->nrecv; k++){
4737     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4738     nrows = *buf_ri_k[k];
4739     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4740     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4741   }
4742 
4743   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4744   len = 0;
4745   for (i=0;i<m;i++) {
4746     bnzi   = 0;
4747     /* add local non-zero cols of this proc's seqmat into lnk */
4748     arow   = owners[rank] + i;
4749     anzi   = ai[arow+1] - ai[arow];
4750     aj     = a->j + ai[arow];
4751     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4752     bnzi += nlnk;
4753     /* add received col data into lnk */
4754     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4755       if (i == *nextrow[k]) { /* i-th row */
4756         anzi = *(nextai[k]+1) - *nextai[k];
4757         aj   = buf_rj[k] + *nextai[k];
4758         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4759         bnzi += nlnk;
4760         nextrow[k]++; nextai[k]++;
4761       }
4762     }
4763     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4764 
4765     /* if free space is not available, make more free space */
4766     if (current_space->local_remaining<bnzi) {
4767       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4768       nspacedouble++;
4769     }
4770     /* copy data into free space, then initialize lnk */
4771     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4772     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4773 
4774     current_space->array           += bnzi;
4775     current_space->local_used      += bnzi;
4776     current_space->local_remaining -= bnzi;
4777 
4778     bi[i+1] = bi[i] + bnzi;
4779   }
4780 
4781   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4782 
4783   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4784   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4785   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4786 
4787   /* create symbolic parallel matrix B_mpi */
4788   /*---------------------------------------*/
4789   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4790   if (n==PETSC_DECIDE) {
4791     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4792   } else {
4793     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4794   }
4795   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4796   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4797   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4798   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4799 
4800   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4801   B_mpi->assembled     = PETSC_FALSE;
4802   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4803   merge->bi            = bi;
4804   merge->bj            = bj;
4805   merge->buf_ri        = buf_ri;
4806   merge->buf_rj        = buf_rj;
4807   merge->coi           = PETSC_NULL;
4808   merge->coj           = PETSC_NULL;
4809   merge->owners_co     = PETSC_NULL;
4810 
4811   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4812 
4813   /* attach the supporting struct to B_mpi for reuse */
4814   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4815   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4816   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4817   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4818   *mpimat = B_mpi;
4819 
4820   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4821   PetscFunctionReturn(0);
4822 }
4823 
4824 #undef __FUNCT__
4825 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4826 /*@C
4827       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4828                  matrices from each processor
4829 
4830     Collective on MPI_Comm
4831 
4832    Input Parameters:
4833 +    comm - the communicators the parallel matrix will live on
4834 .    seqmat - the input sequential matrices
4835 .    m - number of local rows (or PETSC_DECIDE)
4836 .    n - number of local columns (or PETSC_DECIDE)
4837 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4838 
4839    Output Parameter:
4840 .    mpimat - the parallel matrix generated
4841 
4842     Level: advanced
4843 
4844    Notes:
4845      The dimensions of the sequential matrix in each processor MUST be the same.
4846      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4847      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4848 @*/
4849 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4850 {
4851   PetscErrorCode   ierr;
4852   PetscMPIInt     size;
4853 
4854   PetscFunctionBegin;
4855   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4856   if (size == 1){
4857      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4858      if (scall == MAT_INITIAL_MATRIX){
4859        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4860      } else {
4861        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4862      }
4863      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4864      PetscFunctionReturn(0);
4865   }
4866   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4867   if (scall == MAT_INITIAL_MATRIX){
4868     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4869   }
4870   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4871   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4872   PetscFunctionReturn(0);
4873 }
4874 
4875 #undef __FUNCT__
4876 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4877 /*@
4878      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4879           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4880           with MatGetSize()
4881 
4882     Not Collective
4883 
4884    Input Parameters:
4885 +    A - the matrix
4886 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4887 
4888    Output Parameter:
4889 .    A_loc - the local sequential matrix generated
4890 
4891     Level: developer
4892 
4893 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4894 
4895 @*/
4896 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4897 {
4898   PetscErrorCode  ierr;
4899   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4900   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4901   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4902   MatScalar       *aa=a->a,*ba=b->a,*cam;
4903   PetscScalar     *ca;
4904   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4905   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4906   PetscBool       match;
4907 
4908   PetscFunctionBegin;
4909   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4910   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4911   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4912   if (scall == MAT_INITIAL_MATRIX){
4913     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4914     ci[0] = 0;
4915     for (i=0; i<am; i++){
4916       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4917     }
4918     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4919     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4920     k = 0;
4921     for (i=0; i<am; i++) {
4922       ncols_o = bi[i+1] - bi[i];
4923       ncols_d = ai[i+1] - ai[i];
4924       /* off-diagonal portion of A */
4925       for (jo=0; jo<ncols_o; jo++) {
4926         col = cmap[*bj];
4927         if (col >= cstart) break;
4928         cj[k]   = col; bj++;
4929         ca[k++] = *ba++;
4930       }
4931       /* diagonal portion of A */
4932       for (j=0; j<ncols_d; j++) {
4933         cj[k]   = cstart + *aj++;
4934         ca[k++] = *aa++;
4935       }
4936       /* off-diagonal portion of A */
4937       for (j=jo; j<ncols_o; j++) {
4938         cj[k]   = cmap[*bj++];
4939         ca[k++] = *ba++;
4940       }
4941     }
4942     /* put together the new matrix */
4943     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4944     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4945     /* Since these are PETSc arrays, change flags to free them as necessary. */
4946     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4947     mat->free_a  = PETSC_TRUE;
4948     mat->free_ij = PETSC_TRUE;
4949     mat->nonew   = 0;
4950   } else if (scall == MAT_REUSE_MATRIX){
4951     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4952     ci = mat->i; cj = mat->j; cam = mat->a;
4953     for (i=0; i<am; i++) {
4954       /* off-diagonal portion of A */
4955       ncols_o = bi[i+1] - bi[i];
4956       for (jo=0; jo<ncols_o; jo++) {
4957         col = cmap[*bj];
4958         if (col >= cstart) break;
4959         *cam++ = *ba++; bj++;
4960       }
4961       /* diagonal portion of A */
4962       ncols_d = ai[i+1] - ai[i];
4963       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4964       /* off-diagonal portion of A */
4965       for (j=jo; j<ncols_o; j++) {
4966         *cam++ = *ba++; bj++;
4967       }
4968     }
4969   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4970   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4971   PetscFunctionReturn(0);
4972 }
4973 
4974 #undef __FUNCT__
4975 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4976 /*@C
4977      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4978 
4979     Not Collective
4980 
4981    Input Parameters:
4982 +    A - the matrix
4983 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4984 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4985 
4986    Output Parameter:
4987 .    A_loc - the local sequential matrix generated
4988 
4989     Level: developer
4990 
4991 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4992 
4993 @*/
4994 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4995 {
4996   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4997   PetscErrorCode    ierr;
4998   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4999   IS                isrowa,iscola;
5000   Mat               *aloc;
5001   PetscBool       match;
5002 
5003   PetscFunctionBegin;
5004   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5005   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5006   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5007   if (!row){
5008     start = A->rmap->rstart; end = A->rmap->rend;
5009     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5010   } else {
5011     isrowa = *row;
5012   }
5013   if (!col){
5014     start = A->cmap->rstart;
5015     cmap  = a->garray;
5016     nzA   = a->A->cmap->n;
5017     nzB   = a->B->cmap->n;
5018     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5019     ncols = 0;
5020     for (i=0; i<nzB; i++) {
5021       if (cmap[i] < start) idx[ncols++] = cmap[i];
5022       else break;
5023     }
5024     imark = i;
5025     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5026     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5027     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5028   } else {
5029     iscola = *col;
5030   }
5031   if (scall != MAT_INITIAL_MATRIX){
5032     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5033     aloc[0] = *A_loc;
5034   }
5035   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5036   *A_loc = aloc[0];
5037   ierr = PetscFree(aloc);CHKERRQ(ierr);
5038   if (!row){
5039     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5040   }
5041   if (!col){
5042     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5043   }
5044   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5045   PetscFunctionReturn(0);
5046 }
5047 
5048 #undef __FUNCT__
5049 #define __FUNCT__ "MatGetBrowsOfAcols"
5050 /*@C
5051     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5052 
5053     Collective on Mat
5054 
5055    Input Parameters:
5056 +    A,B - the matrices in mpiaij format
5057 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5058 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5059 
5060    Output Parameter:
5061 +    rowb, colb - index sets of rows and columns of B to extract
5062 -    B_seq - the sequential matrix generated
5063 
5064     Level: developer
5065 
5066 @*/
5067 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5068 {
5069   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5070   PetscErrorCode    ierr;
5071   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5072   IS                isrowb,iscolb;
5073   Mat               *bseq=PETSC_NULL;
5074 
5075   PetscFunctionBegin;
5076   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5077     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5078   }
5079   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5080 
5081   if (scall == MAT_INITIAL_MATRIX){
5082     start = A->cmap->rstart;
5083     cmap  = a->garray;
5084     nzA   = a->A->cmap->n;
5085     nzB   = a->B->cmap->n;
5086     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5087     ncols = 0;
5088     for (i=0; i<nzB; i++) {  /* row < local row index */
5089       if (cmap[i] < start) idx[ncols++] = cmap[i];
5090       else break;
5091     }
5092     imark = i;
5093     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5094     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5095     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5096     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5097   } else {
5098     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5099     isrowb = *rowb; iscolb = *colb;
5100     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5101     bseq[0] = *B_seq;
5102   }
5103   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5104   *B_seq = bseq[0];
5105   ierr = PetscFree(bseq);CHKERRQ(ierr);
5106   if (!rowb){
5107     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5108   } else {
5109     *rowb = isrowb;
5110   }
5111   if (!colb){
5112     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5113   } else {
5114     *colb = iscolb;
5115   }
5116   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5117   PetscFunctionReturn(0);
5118 }
5119 
5120 #undef __FUNCT__
5121 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5122 /*
5123     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5124     of the OFF-DIAGONAL portion of local A
5125 
5126     Collective on Mat
5127 
5128    Input Parameters:
5129 +    A,B - the matrices in mpiaij format
5130 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5131 
5132    Output Parameter:
5133 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5134 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5135 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5136 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5137 
5138     Level: developer
5139 
5140 */
5141 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5142 {
5143   VecScatter_MPI_General *gen_to,*gen_from;
5144   PetscErrorCode         ierr;
5145   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5146   Mat_SeqAIJ             *b_oth;
5147   VecScatter             ctx=a->Mvctx;
5148   MPI_Comm               comm=((PetscObject)ctx)->comm;
5149   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5150   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5151   PetscScalar            *rvalues,*svalues;
5152   MatScalar              *b_otha,*bufa,*bufA;
5153   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5154   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5155   MPI_Status             *sstatus,rstatus;
5156   PetscMPIInt            jj;
5157   PetscInt               *cols,sbs,rbs;
5158   PetscScalar            *vals;
5159 
5160   PetscFunctionBegin;
5161   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5162     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5163   }
5164   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5165   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5166 
5167   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5168   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5169   rvalues  = gen_from->values; /* holds the length of receiving row */
5170   svalues  = gen_to->values;   /* holds the length of sending row */
5171   nrecvs   = gen_from->n;
5172   nsends   = gen_to->n;
5173 
5174   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5175   srow     = gen_to->indices;   /* local row index to be sent */
5176   sstarts  = gen_to->starts;
5177   sprocs   = gen_to->procs;
5178   sstatus  = gen_to->sstatus;
5179   sbs      = gen_to->bs;
5180   rstarts  = gen_from->starts;
5181   rprocs   = gen_from->procs;
5182   rbs      = gen_from->bs;
5183 
5184   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5185   if (scall == MAT_INITIAL_MATRIX){
5186     /* i-array */
5187     /*---------*/
5188     /*  post receives */
5189     for (i=0; i<nrecvs; i++){
5190       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5191       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5192       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5193     }
5194 
5195     /* pack the outgoing message */
5196     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5197     sstartsj[0] = 0;  rstartsj[0] = 0;
5198     len = 0; /* total length of j or a array to be sent */
5199     k = 0;
5200     for (i=0; i<nsends; i++){
5201       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5202       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5203       for (j=0; j<nrows; j++) {
5204         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5205         for (l=0; l<sbs; l++){
5206           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5207           rowlen[j*sbs+l] = ncols;
5208           len += ncols;
5209           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5210         }
5211         k++;
5212       }
5213       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5214       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5215     }
5216     /* recvs and sends of i-array are completed */
5217     i = nrecvs;
5218     while (i--) {
5219       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5220     }
5221     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5222 
5223     /* allocate buffers for sending j and a arrays */
5224     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5225     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5226 
5227     /* create i-array of B_oth */
5228     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5229     b_othi[0] = 0;
5230     len = 0; /* total length of j or a array to be received */
5231     k = 0;
5232     for (i=0; i<nrecvs; i++){
5233       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5234       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5235       for (j=0; j<nrows; j++) {
5236         b_othi[k+1] = b_othi[k] + rowlen[j];
5237         len += rowlen[j]; k++;
5238       }
5239       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5240     }
5241 
5242     /* allocate space for j and a arrrays of B_oth */
5243     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5244     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5245 
5246     /* j-array */
5247     /*---------*/
5248     /*  post receives of j-array */
5249     for (i=0; i<nrecvs; i++){
5250       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5251       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5252     }
5253 
5254     /* pack the outgoing message j-array */
5255     k = 0;
5256     for (i=0; i<nsends; i++){
5257       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5258       bufJ = bufj+sstartsj[i];
5259       for (j=0; j<nrows; j++) {
5260         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5261         for (ll=0; ll<sbs; ll++){
5262           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5263           for (l=0; l<ncols; l++){
5264             *bufJ++ = cols[l];
5265           }
5266           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5267         }
5268       }
5269       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5270     }
5271 
5272     /* recvs and sends of j-array are completed */
5273     i = nrecvs;
5274     while (i--) {
5275       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5276     }
5277     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5278   } else if (scall == MAT_REUSE_MATRIX){
5279     sstartsj = *startsj_s;
5280     rstartsj = *startsj_r;
5281     bufa     = *bufa_ptr;
5282     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5283     b_otha   = b_oth->a;
5284   } else {
5285     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5286   }
5287 
5288   /* a-array */
5289   /*---------*/
5290   /*  post receives of a-array */
5291   for (i=0; i<nrecvs; i++){
5292     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5293     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5294   }
5295 
5296   /* pack the outgoing message a-array */
5297   k = 0;
5298   for (i=0; i<nsends; i++){
5299     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5300     bufA = bufa+sstartsj[i];
5301     for (j=0; j<nrows; j++) {
5302       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5303       for (ll=0; ll<sbs; ll++){
5304         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5305         for (l=0; l<ncols; l++){
5306           *bufA++ = vals[l];
5307         }
5308         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5309       }
5310     }
5311     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5312   }
5313   /* recvs and sends of a-array are completed */
5314   i = nrecvs;
5315   while (i--) {
5316     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5317   }
5318   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5319   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5320 
5321   if (scall == MAT_INITIAL_MATRIX){
5322     /* put together the new matrix */
5323     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5324 
5325     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5326     /* Since these are PETSc arrays, change flags to free them as necessary. */
5327     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5328     b_oth->free_a  = PETSC_TRUE;
5329     b_oth->free_ij = PETSC_TRUE;
5330     b_oth->nonew   = 0;
5331 
5332     ierr = PetscFree(bufj);CHKERRQ(ierr);
5333     if (!startsj_s || !bufa_ptr){
5334       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5335       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5336     } else {
5337       *startsj_s = sstartsj;
5338       *startsj_r = rstartsj;
5339       *bufa_ptr  = bufa;
5340     }
5341   }
5342   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5343   PetscFunctionReturn(0);
5344 }
5345 
5346 #undef __FUNCT__
5347 #define __FUNCT__ "MatGetCommunicationStructs"
5348 /*@C
5349   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5350 
5351   Not Collective
5352 
5353   Input Parameters:
5354 . A - The matrix in mpiaij format
5355 
5356   Output Parameter:
5357 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5358 . colmap - A map from global column index to local index into lvec
5359 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5360 
5361   Level: developer
5362 
5363 @*/
5364 #if defined (PETSC_USE_CTABLE)
5365 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5366 #else
5367 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5368 #endif
5369 {
5370   Mat_MPIAIJ *a;
5371 
5372   PetscFunctionBegin;
5373   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5374   PetscValidPointer(lvec, 2);
5375   PetscValidPointer(colmap, 3);
5376   PetscValidPointer(multScatter, 4);
5377   a = (Mat_MPIAIJ *) A->data;
5378   if (lvec) *lvec = a->lvec;
5379   if (colmap) *colmap = a->colmap;
5380   if (multScatter) *multScatter = a->Mvctx;
5381   PetscFunctionReturn(0);
5382 }
5383 
5384 EXTERN_C_BEGIN
5385 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5386 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5387 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5388 EXTERN_C_END
5389 
5390 #undef __FUNCT__
5391 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5392 /*
5393     Computes (B'*A')' since computing B*A directly is untenable
5394 
5395                n                       p                          p
5396         (              )       (              )         (                  )
5397       m (      A       )  *  n (       B      )   =   m (         C        )
5398         (              )       (              )         (                  )
5399 
5400 */
5401 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5402 {
5403   PetscErrorCode     ierr;
5404   Mat                At,Bt,Ct;
5405 
5406   PetscFunctionBegin;
5407   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5408   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5409   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5410   ierr = MatDestroy(&At);CHKERRQ(ierr);
5411   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5412   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5413   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5414   PetscFunctionReturn(0);
5415 }
5416 
5417 #undef __FUNCT__
5418 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5419 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5420 {
5421   PetscErrorCode ierr;
5422   PetscInt       m=A->rmap->n,n=B->cmap->n;
5423   Mat            Cmat;
5424 
5425   PetscFunctionBegin;
5426   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5427   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5428   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5429   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5430   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5431   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5432   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5433   *C   = Cmat;
5434   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5435   PetscFunctionReturn(0);
5436 }
5437 
5438 /* ----------------------------------------------------------------*/
5439 #undef __FUNCT__
5440 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5441 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5442 {
5443   PetscErrorCode ierr;
5444 
5445   PetscFunctionBegin;
5446   if (scall == MAT_INITIAL_MATRIX){
5447     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5448   }
5449   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5450   PetscFunctionReturn(0);
5451 }
5452 
5453 EXTERN_C_BEGIN
5454 #if defined(PETSC_HAVE_MUMPS)
5455 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5456 #endif
5457 #if defined(PETSC_HAVE_PASTIX)
5458 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5459 #endif
5460 #if defined(PETSC_HAVE_SUPERLU_DIST)
5461 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5462 #endif
5463 #if defined(PETSC_HAVE_SPOOLES)
5464 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5465 #endif
5466 EXTERN_C_END
5467 
5468 /*MC
5469    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5470 
5471    Options Database Keys:
5472 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5473 
5474   Level: beginner
5475 
5476 .seealso: MatCreateAIJ()
5477 M*/
5478 
5479 EXTERN_C_BEGIN
5480 #undef __FUNCT__
5481 #define __FUNCT__ "MatCreate_MPIAIJ"
5482 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5483 {
5484   Mat_MPIAIJ     *b;
5485   PetscErrorCode ierr;
5486   PetscMPIInt    size;
5487 
5488   PetscFunctionBegin;
5489   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5490 
5491   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5492   B->data         = (void*)b;
5493   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5494   B->assembled    = PETSC_FALSE;
5495 
5496   B->insertmode   = NOT_SET_VALUES;
5497   b->size         = size;
5498   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5499 
5500   /* build cache for off array entries formed */
5501   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5502   b->donotstash  = PETSC_FALSE;
5503   b->colmap      = 0;
5504   b->garray      = 0;
5505   b->roworiented = PETSC_TRUE;
5506 
5507   /* stuff used for matrix vector multiply */
5508   b->lvec      = PETSC_NULL;
5509   b->Mvctx     = PETSC_NULL;
5510 
5511   /* stuff for MatGetRow() */
5512   b->rowindices   = 0;
5513   b->rowvalues    = 0;
5514   b->getrowactive = PETSC_FALSE;
5515 
5516 #if defined(PETSC_HAVE_SPOOLES)
5517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5518                                      "MatGetFactor_mpiaij_spooles",
5519                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5520 #endif
5521 #if defined(PETSC_HAVE_MUMPS)
5522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5523                                      "MatGetFactor_aij_mumps",
5524                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5525 #endif
5526 #if defined(PETSC_HAVE_PASTIX)
5527   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5528 					   "MatGetFactor_mpiaij_pastix",
5529 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5530 #endif
5531 #if defined(PETSC_HAVE_SUPERLU_DIST)
5532   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5533                                      "MatGetFactor_mpiaij_superlu_dist",
5534                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5535 #endif
5536   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5537                                      "MatStoreValues_MPIAIJ",
5538                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5539   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5540                                      "MatRetrieveValues_MPIAIJ",
5541                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5543 				     "MatGetDiagonalBlock_MPIAIJ",
5544                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5545   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5546 				     "MatIsTranspose_MPIAIJ",
5547 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5549 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5550 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5551   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5552 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5553 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5555 				     "MatDiagonalScaleLocal_MPIAIJ",
5556 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5558                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5559                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5560   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5561                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5562                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5563   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5564                                      "MatConvert_MPIAIJ_MPISBAIJ",
5565                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5566   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5567                                      "MatMatMult_MPIDense_MPIAIJ",
5568                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5569   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5570                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5571                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5572   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5573                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5574                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5575   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5576   PetscFunctionReturn(0);
5577 }
5578 EXTERN_C_END
5579 
5580 #undef __FUNCT__
5581 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5582 /*@
5583      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5584          and "off-diagonal" part of the matrix in CSR format.
5585 
5586    Collective on MPI_Comm
5587 
5588    Input Parameters:
5589 +  comm - MPI communicator
5590 .  m - number of local rows (Cannot be PETSC_DECIDE)
5591 .  n - This value should be the same as the local size used in creating the
5592        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5593        calculated if N is given) For square matrices n is almost always m.
5594 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5595 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5596 .   i - row indices for "diagonal" portion of matrix
5597 .   j - column indices
5598 .   a - matrix values
5599 .   oi - row indices for "off-diagonal" portion of matrix
5600 .   oj - column indices
5601 -   oa - matrix values
5602 
5603    Output Parameter:
5604 .   mat - the matrix
5605 
5606    Level: advanced
5607 
5608    Notes:
5609        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5610        must free the arrays once the matrix has been destroyed and not before.
5611 
5612        The i and j indices are 0 based
5613 
5614        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5615 
5616        This sets local rows and cannot be used to set off-processor values.
5617 
5618        You cannot later use MatSetValues() to change values in this matrix.
5619 
5620 .keywords: matrix, aij, compressed row, sparse, parallel
5621 
5622 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5623           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5624 @*/
5625 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5626 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5627 {
5628   PetscErrorCode ierr;
5629   Mat_MPIAIJ     *maij;
5630 
5631  PetscFunctionBegin;
5632   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5633   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5634   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5635   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5636   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5637   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5638   maij = (Mat_MPIAIJ*) (*mat)->data;
5639   maij->donotstash     = PETSC_TRUE;
5640   (*mat)->preallocated = PETSC_TRUE;
5641 
5642   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5643   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5644 
5645   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5646   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5647 
5648   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5649   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5650   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5651   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5652 
5653   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5654   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5655   PetscFunctionReturn(0);
5656 }
5657 
5658 /*
5659     Special version for direct calls from Fortran
5660 */
5661 #include <petsc-private/fortranimpl.h>
5662 
5663 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5664 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5665 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5666 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5667 #endif
5668 
5669 /* Change these macros so can be used in void function */
5670 #undef CHKERRQ
5671 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5672 #undef SETERRQ2
5673 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5674 #undef SETERRQ3
5675 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5676 #undef SETERRQ
5677 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5678 
5679 EXTERN_C_BEGIN
5680 #undef __FUNCT__
5681 #define __FUNCT__ "matsetvaluesmpiaij_"
5682 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5683 {
5684   Mat             mat = *mmat;
5685   PetscInt        m = *mm, n = *mn;
5686   InsertMode      addv = *maddv;
5687   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5688   PetscScalar     value;
5689   PetscErrorCode  ierr;
5690 
5691   MatCheckPreallocated(mat,1);
5692   if (mat->insertmode == NOT_SET_VALUES) {
5693     mat->insertmode = addv;
5694   }
5695 #if defined(PETSC_USE_DEBUG)
5696   else if (mat->insertmode != addv) {
5697     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5698   }
5699 #endif
5700   {
5701   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5702   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5703   PetscBool       roworiented = aij->roworiented;
5704 
5705   /* Some Variables required in the macro */
5706   Mat             A = aij->A;
5707   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5708   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5709   MatScalar       *aa = a->a;
5710   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5711   Mat             B = aij->B;
5712   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5713   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5714   MatScalar       *ba = b->a;
5715 
5716   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5717   PetscInt        nonew = a->nonew;
5718   MatScalar       *ap1,*ap2;
5719 
5720   PetscFunctionBegin;
5721   for (i=0; i<m; i++) {
5722     if (im[i] < 0) continue;
5723 #if defined(PETSC_USE_DEBUG)
5724     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5725 #endif
5726     if (im[i] >= rstart && im[i] < rend) {
5727       row      = im[i] - rstart;
5728       lastcol1 = -1;
5729       rp1      = aj + ai[row];
5730       ap1      = aa + ai[row];
5731       rmax1    = aimax[row];
5732       nrow1    = ailen[row];
5733       low1     = 0;
5734       high1    = nrow1;
5735       lastcol2 = -1;
5736       rp2      = bj + bi[row];
5737       ap2      = ba + bi[row];
5738       rmax2    = bimax[row];
5739       nrow2    = bilen[row];
5740       low2     = 0;
5741       high2    = nrow2;
5742 
5743       for (j=0; j<n; j++) {
5744         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5745         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5746         if (in[j] >= cstart && in[j] < cend){
5747           col = in[j] - cstart;
5748           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5749         } else if (in[j] < 0) continue;
5750 #if defined(PETSC_USE_DEBUG)
5751         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5752 #endif
5753         else {
5754           if (mat->was_assembled) {
5755             if (!aij->colmap) {
5756               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5757             }
5758 #if defined (PETSC_USE_CTABLE)
5759             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5760 	    col--;
5761 #else
5762             col = aij->colmap[in[j]] - 1;
5763 #endif
5764             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5765               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5766               col =  in[j];
5767               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5768               B = aij->B;
5769               b = (Mat_SeqAIJ*)B->data;
5770               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5771               rp2      = bj + bi[row];
5772               ap2      = ba + bi[row];
5773               rmax2    = bimax[row];
5774               nrow2    = bilen[row];
5775               low2     = 0;
5776               high2    = nrow2;
5777               bm       = aij->B->rmap->n;
5778               ba = b->a;
5779             }
5780           } else col = in[j];
5781           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5782         }
5783       }
5784     } else {
5785       if (!aij->donotstash) {
5786         if (roworiented) {
5787           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5788         } else {
5789           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5790         }
5791       }
5792     }
5793   }}
5794   PetscFunctionReturnVoid();
5795 }
5796 EXTERN_C_END
5797 
5798