xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 08ea439d8b494dc5b67b6c396f980d76734849b1)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302 
1303   PetscFunctionBegin;
1304   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1305   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1306   nz   = A->nz + B->nz;
1307   if (!rank) {
1308     header[0] = MAT_FILE_CLASSID;
1309     header[1] = mat->rmap->N;
1310     header[2] = mat->cmap->N;
1311     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1312     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1313     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1314     /* get largest number of rows any processor has */
1315     rlen = mat->rmap->n;
1316     range = mat->rmap->range;
1317     for (i=1; i<size; i++) {
1318       rlen = PetscMax(rlen,range[i+1] - range[i]);
1319     }
1320   } else {
1321     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1322     rlen = mat->rmap->n;
1323   }
1324 
1325   /* load up the local row counts */
1326   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1327   for (i=0; i<mat->rmap->n; i++) {
1328     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1329   }
1330 
1331   /* store the row lengths to the file */
1332   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1333   if (!rank) {
1334     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     for (i=1; i<size; i++) {
1336       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1337       rlen = range[i+1] - range[i];
1338       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1340     }
1341     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1342   } else {
1343     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1344     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1346   }
1347   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1348 
1349   /* load up the local column indices */
1350   nzmax = nz; /* )th processor needs space a largest processor needs */
1351   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1352   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1353   cnt  = 0;
1354   for (i=0; i<mat->rmap->n; i++) {
1355     for (j=B->i[i]; j<B->i[i+1]; j++) {
1356       if ( (col = garray[B->j[j]]) > cstart) break;
1357       column_indices[cnt++] = col;
1358     }
1359     for (k=A->i[i]; k<A->i[i+1]; k++) {
1360       column_indices[cnt++] = A->j[k] + cstart;
1361     }
1362     for (; j<B->i[i+1]; j++) {
1363       column_indices[cnt++] = garray[B->j[j]];
1364     }
1365   }
1366   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1367 
1368   /* store the column indices to the file */
1369    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1370   if (!rank) {
1371     MPI_Status status;
1372     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1373     for (i=1; i<size; i++) {
1374       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1375       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1376       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1377       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1378       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1379     }
1380      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1381   } else {
1382     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1383     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1384     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1386   }
1387   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1388 
1389   /* load up the local column values */
1390   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1391   cnt  = 0;
1392   for (i=0; i<mat->rmap->n; i++) {
1393     for (j=B->i[i]; j<B->i[i+1]; j++) {
1394       if ( garray[B->j[j]] > cstart) break;
1395       column_values[cnt++] = B->a[j];
1396     }
1397     for (k=A->i[i]; k<A->i[i+1]; k++) {
1398       column_values[cnt++] = A->a[k];
1399     }
1400     for (; j<B->i[i+1]; j++) {
1401       column_values[cnt++] = B->a[j];
1402     }
1403   }
1404   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1405 
1406   /* store the column values to the file */
1407    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1408   if (!rank) {
1409     MPI_Status status;
1410     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1411     for (i=1; i<size; i++) {
1412        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1413       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1414       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1415       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1416       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1417     }
1418     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1419   } else {
1420     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1421     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1422     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1424   }
1425   ierr = PetscFree(column_values);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1432 {
1433   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1434   PetscErrorCode    ierr;
1435   PetscMPIInt       rank = aij->rank,size = aij->size;
1436   PetscBool         isdraw,iascii,isbinary;
1437   PetscViewer       sviewer;
1438   PetscViewerFormat format;
1439 
1440   PetscFunctionBegin;
1441   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1442   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1443   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1444   if (iascii) {
1445     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1446     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1447       MatInfo    info;
1448       PetscBool  inodes;
1449 
1450       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1451       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1452       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1454       if (!inodes) {
1455         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1456 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1457       } else {
1458         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1459 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1460       }
1461       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1463       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1465       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1468       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1469       PetscFunctionReturn(0);
1470     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1471       PetscInt   inodecount,inodelimit,*inodes;
1472       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1473       if (inodes) {
1474         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1475       } else {
1476         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1477       }
1478       PetscFunctionReturn(0);
1479     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1480       PetscFunctionReturn(0);
1481     }
1482   } else if (isbinary) {
1483     if (size == 1) {
1484       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1485       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1486     } else {
1487       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1488     }
1489     PetscFunctionReturn(0);
1490   } else if (isdraw) {
1491     PetscDraw  draw;
1492     PetscBool  isnull;
1493     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1494     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1495   }
1496 
1497   if (size == 1) {
1498     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1499     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1500   } else {
1501     /* assemble the entire matrix onto first processor. */
1502     Mat         A;
1503     Mat_SeqAIJ  *Aloc;
1504     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1505     MatScalar   *a;
1506 
1507     if (mat->rmap->N > 1024) {
1508       PetscBool  flg = PETSC_FALSE;
1509 
1510       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1511       if (!flg) {
1512         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1513       }
1514     }
1515 
1516     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1517     if (!rank) {
1518       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1519     } else {
1520       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1521     }
1522     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1523     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1524     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1525     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1526     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1527 
1528     /* copy over the A part */
1529     Aloc = (Mat_SeqAIJ*)aij->A->data;
1530     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1531     row = mat->rmap->rstart;
1532     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1533     for (i=0; i<m; i++) {
1534       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1535       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1536     }
1537     aj = Aloc->j;
1538     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1539 
1540     /* copy over the B part */
1541     Aloc = (Mat_SeqAIJ*)aij->B->data;
1542     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543     row  = mat->rmap->rstart;
1544     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1545     ct   = cols;
1546     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1547     for (i=0; i<m; i++) {
1548       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1549       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1550     }
1551     ierr = PetscFree(ct);CHKERRQ(ierr);
1552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554     /*
1555        Everyone has to call to draw the matrix since the graphics waits are
1556        synchronized across all processors that share the PetscDraw object
1557     */
1558     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1559     if (!rank) {
1560       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1561       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1562       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1563       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1564     }
1565     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     ierr = MatDestroy(&A);CHKERRQ(ierr);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatView_MPIAIJ"
1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1574 {
1575   PetscErrorCode ierr;
1576   PetscBool      iascii,isdraw,issocket,isbinary;
1577 
1578   PetscFunctionBegin;
1579   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1580   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1581   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1582   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1583   if (iascii || isdraw || isbinary || issocket) {
1584     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1585   } else {
1586     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSOR_MPIAIJ"
1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1594 {
1595   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1596   PetscErrorCode ierr;
1597   Vec            bb1 = 0;
1598   PetscBool      hasop;
1599 
1600   PetscFunctionBegin;
1601   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1602     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1603   }
1604 
1605   if (flag == SOR_APPLY_UPPER) {
1606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607     PetscFunctionReturn(0);
1608   }
1609 
1610   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1611     if (flag & SOR_ZERO_INITIAL_GUESS) {
1612       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1613       its--;
1614     }
1615 
1616     while (its--) {
1617       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1618       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1619 
1620       /* update rhs: bb1 = bb - B*x */
1621       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1622       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1623 
1624       /* local sweep */
1625       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1626     }
1627   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1628     if (flag & SOR_ZERO_INITIAL_GUESS) {
1629       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1630       its--;
1631     }
1632     while (its--) {
1633       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1634       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1635 
1636       /* update rhs: bb1 = bb - B*x */
1637       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1638       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1639 
1640       /* local sweep */
1641       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1642     }
1643   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1644     if (flag & SOR_ZERO_INITIAL_GUESS) {
1645       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1646       its--;
1647     }
1648     while (its--) {
1649       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1650       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1651 
1652       /* update rhs: bb1 = bb - B*x */
1653       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1654       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1655 
1656       /* local sweep */
1657       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1658     }
1659   }  else if (flag & SOR_EISENSTAT) {
1660     Vec         xx1;
1661 
1662     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1663     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1664 
1665     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667     if (!mat->diag) {
1668       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1669       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1670     }
1671     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1672     if (hasop) {
1673       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1674     } else {
1675       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1676     }
1677     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1678 
1679     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1680 
1681     /* local sweep */
1682     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1683     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1684     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1685   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1686 
1687   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatPermute_MPIAIJ"
1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1694 {
1695   MPI_Comm       comm;
1696   PetscInt       first,local_rowsize,local_colsize;
1697   const PetscInt *rows;
1698   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1703   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1704   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1705   /* collect the global row permutation and invert it */
1706   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1707   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1708   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1709   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1710   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1711   /* get the local target indices */
1712   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1713   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1714   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1716   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1717   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1718   /* the column permutation is so much easier;
1719      make a local version of 'colp' and invert it */
1720   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1721   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1722   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1723   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1724   /* now we just get the submatrix */
1725   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1726   /* clean up */
1727   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1728   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1735 {
1736   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1737   Mat            A = mat->A,B = mat->B;
1738   PetscErrorCode ierr;
1739   PetscReal      isend[5],irecv[5];
1740 
1741   PetscFunctionBegin;
1742   info->block_size     = 1.0;
1743   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1744   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1745   isend[3] = info->memory;  isend[4] = info->mallocs;
1746   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1747   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1748   isend[3] += info->memory;  isend[4] += info->mallocs;
1749   if (flag == MAT_LOCAL) {
1750     info->nz_used      = isend[0];
1751     info->nz_allocated = isend[1];
1752     info->nz_unneeded  = isend[2];
1753     info->memory       = isend[3];
1754     info->mallocs      = isend[4];
1755   } else if (flag == MAT_GLOBAL_MAX) {
1756     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1757     info->nz_used      = irecv[0];
1758     info->nz_allocated = irecv[1];
1759     info->nz_unneeded  = irecv[2];
1760     info->memory       = irecv[3];
1761     info->mallocs      = irecv[4];
1762   } else if (flag == MAT_GLOBAL_SUM) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   }
1770   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1771   info->fill_ratio_needed = 0;
1772   info->factor_mallocs    = 0;
1773 
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatSetOption_MPIAIJ"
1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1780 {
1781   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1782   PetscErrorCode ierr;
1783 
1784   PetscFunctionBegin;
1785   switch (op) {
1786   case MAT_NEW_NONZERO_LOCATIONS:
1787   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1788   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1789   case MAT_KEEP_NONZERO_PATTERN:
1790   case MAT_NEW_NONZERO_LOCATION_ERR:
1791   case MAT_USE_INODES:
1792   case MAT_IGNORE_ZERO_ENTRIES:
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1795     break;
1796   case MAT_ROW_ORIENTED:
1797     a->roworiented = flg;
1798     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1799     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1800     break;
1801   case MAT_NEW_DIAGONALS:
1802     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1803     break;
1804   case MAT_IGNORE_OFF_PROC_ENTRIES:
1805     a->donotstash = flg;
1806     break;
1807   case MAT_SPD:
1808     A->spd_set                         = PETSC_TRUE;
1809     A->spd                             = flg;
1810     if (flg) {
1811       A->symmetric                     = PETSC_TRUE;
1812       A->structurally_symmetric        = PETSC_TRUE;
1813       A->symmetric_set                 = PETSC_TRUE;
1814       A->structurally_symmetric_set    = PETSC_TRUE;
1815     }
1816     break;
1817   case MAT_SYMMETRIC:
1818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_STRUCTURALLY_SYMMETRIC:
1821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1822     break;
1823   case MAT_HERMITIAN:
1824     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1825     break;
1826   case MAT_SYMMETRY_ETERNAL:
1827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1828     break;
1829   default:
1830     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1831   }
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatGetRow_MPIAIJ"
1837 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1838 {
1839   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1840   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1841   PetscErrorCode ierr;
1842   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1843   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1844   PetscInt       *cmap,*idx_p;
1845 
1846   PetscFunctionBegin;
1847   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1848   mat->getrowactive = PETSC_TRUE;
1849 
1850   if (!mat->rowvalues && (idx || v)) {
1851     /*
1852         allocate enough space to hold information from the longest row.
1853     */
1854     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1855     PetscInt   max = 1,tmp;
1856     for (i=0; i<matin->rmap->n; i++) {
1857       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1858       if (max < tmp) { max = tmp; }
1859     }
1860     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1861   }
1862 
1863   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1864   lrow = row - rstart;
1865 
1866   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1867   if (!v)   {pvA = 0; pvB = 0;}
1868   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1869   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1870   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1871   nztot = nzA + nzB;
1872 
1873   cmap  = mat->garray;
1874   if (v  || idx) {
1875     if (nztot) {
1876       /* Sort by increasing column numbers, assuming A and B already sorted */
1877       PetscInt imark = -1;
1878       if (v) {
1879         *v = v_p = mat->rowvalues;
1880         for (i=0; i<nzB; i++) {
1881           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1882           else break;
1883         }
1884         imark = i;
1885         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1886         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1887       }
1888       if (idx) {
1889         *idx = idx_p = mat->rowindices;
1890         if (imark > -1) {
1891           for (i=0; i<imark; i++) {
1892             idx_p[i] = cmap[cworkB[i]];
1893           }
1894         } else {
1895           for (i=0; i<nzB; i++) {
1896             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1897             else break;
1898           }
1899           imark = i;
1900         }
1901         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1902         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1903       }
1904     } else {
1905       if (idx) *idx = 0;
1906       if (v)   *v   = 0;
1907     }
1908   }
1909   *nz = nztot;
1910   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1911   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1917 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1918 {
1919   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1920 
1921   PetscFunctionBegin;
1922   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1923   aij->getrowactive = PETSC_FALSE;
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatNorm_MPIAIJ"
1929 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1930 {
1931   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1932   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1933   PetscErrorCode ierr;
1934   PetscInt       i,j,cstart = mat->cmap->rstart;
1935   PetscReal      sum = 0.0;
1936   MatScalar      *v;
1937 
1938   PetscFunctionBegin;
1939   if (aij->size == 1) {
1940     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1941   } else {
1942     if (type == NORM_FROBENIUS) {
1943       v = amat->a;
1944       for (i=0; i<amat->nz; i++) {
1945 #if defined(PETSC_USE_COMPLEX)
1946         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1947 #else
1948         sum += (*v)*(*v); v++;
1949 #endif
1950       }
1951       v = bmat->a;
1952       for (i=0; i<bmat->nz; i++) {
1953 #if defined(PETSC_USE_COMPLEX)
1954         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1955 #else
1956         sum += (*v)*(*v); v++;
1957 #endif
1958       }
1959       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1960       *norm = PetscSqrtReal(*norm);
1961     } else if (type == NORM_1) { /* max column norm */
1962       PetscReal *tmp,*tmp2;
1963       PetscInt  *jj,*garray = aij->garray;
1964       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1965       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1966       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1967       *norm = 0.0;
1968       v = amat->a; jj = amat->j;
1969       for (j=0; j<amat->nz; j++) {
1970         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1971       }
1972       v = bmat->a; jj = bmat->j;
1973       for (j=0; j<bmat->nz; j++) {
1974         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1975       }
1976       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1977       for (j=0; j<mat->cmap->N; j++) {
1978         if (tmp2[j] > *norm) *norm = tmp2[j];
1979       }
1980       ierr = PetscFree(tmp);CHKERRQ(ierr);
1981       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1982     } else if (type == NORM_INFINITY) { /* max row norm */
1983       PetscReal ntemp = 0.0;
1984       for (j=0; j<aij->A->rmap->n; j++) {
1985         v = amat->a + amat->i[j];
1986         sum = 0.0;
1987         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         v = bmat->a + bmat->i[j];
1991         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         if (sum > ntemp) ntemp = sum;
1995       }
1996       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1997     } else {
1998       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1999     }
2000   }
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatTranspose_MPIAIJ"
2006 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2007 {
2008   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2009   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2010   PetscErrorCode ierr;
2011   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2012   PetscInt       cstart=A->cmap->rstart,ncol;
2013   Mat            B;
2014   MatScalar      *array;
2015 
2016   PetscFunctionBegin;
2017   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2018 
2019   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2020   ai = Aloc->i; aj = Aloc->j;
2021   bi = Bloc->i; bj = Bloc->j;
2022   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2023     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2024     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2025     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2026     for (i=0; i<ai[ma]; i++){
2027       d_nnz[aj[i]] ++;
2028       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2029     }
2030 
2031     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2032     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2033     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2034     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2035     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2036     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2037     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2038   } else {
2039     B = *matout;
2040   }
2041 
2042   /* copy over the A part */
2043   array = Aloc->a;
2044   row = A->rmap->rstart;
2045   for (i=0; i<ma; i++) {
2046     ncol = ai[i+1]-ai[i];
2047     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2048     row++; array += ncol; aj += ncol;
2049   }
2050   aj = Aloc->j;
2051   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2052 
2053   /* copy over the B part */
2054   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2055   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2056   array = Bloc->a;
2057   row = A->rmap->rstart;
2058   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2059   cols_tmp = cols;
2060   for (i=0; i<mb; i++) {
2061     ncol = bi[i+1]-bi[i];
2062     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2063     row++; array += ncol; cols_tmp += ncol;
2064   }
2065   ierr = PetscFree(cols);CHKERRQ(ierr);
2066 
2067   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2068   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2070     *matout = B;
2071   } else {
2072     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2079 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2080 {
2081   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2082   Mat            a = aij->A,b = aij->B;
2083   PetscErrorCode ierr;
2084   PetscInt       s1,s2,s3;
2085 
2086   PetscFunctionBegin;
2087   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2088   if (rr) {
2089     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2090     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2091     /* Overlap communication with computation. */
2092     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2093   }
2094   if (ll) {
2095     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2096     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2097     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2098   }
2099   /* scale  the diagonal block */
2100   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2101 
2102   if (rr) {
2103     /* Do a scatter end and then right scale the off-diagonal block */
2104     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2105     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2106   }
2107 
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 #undef __FUNCT__
2112 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2113 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2114 {
2115   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatEqual_MPIAIJ"
2125 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2126 {
2127   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2128   Mat            a,b,c,d;
2129   PetscBool      flg;
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   a = matA->A; b = matA->B;
2134   c = matB->A; d = matB->B;
2135 
2136   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2137   if (flg) {
2138     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2139   }
2140   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 #undef __FUNCT__
2145 #define __FUNCT__ "MatCopy_MPIAIJ"
2146 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2147 {
2148   PetscErrorCode ierr;
2149   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2150   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2151 
2152   PetscFunctionBegin;
2153   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2154   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2155     /* because of the column compression in the off-processor part of the matrix a->B,
2156        the number of columns in a->B and b->B may be different, hence we cannot call
2157        the MatCopy() directly on the two parts. If need be, we can provide a more
2158        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2159        then copying the submatrices */
2160     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2161   } else {
2162     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2163     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatSetUp_MPIAIJ"
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2182 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2183 {
2184   PetscInt          i,m=Y->rmap->N;
2185   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2186   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2187   const PetscInt    *xi = x->i,*yi = y->i;
2188 
2189   PetscFunctionBegin;
2190   /* Set the number of nonzeros in the new matrix */
2191   for(i=0; i<m; i++) {
2192     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2193     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2194     nnz[i] = 0;
2195     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2196       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2197       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2198       nnz[i]++;
2199     }
2200     for (; k<nzy; k++) nnz[i]++;
2201   }
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatAXPY_MPIAIJ"
2207 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2208 {
2209   PetscErrorCode ierr;
2210   PetscInt       i;
2211   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2212   PetscBLASInt   bnz,one=1;
2213   Mat_SeqAIJ     *x,*y;
2214 
2215   PetscFunctionBegin;
2216   if (str == SAME_NONZERO_PATTERN) {
2217     PetscScalar alpha = a;
2218     x = (Mat_SeqAIJ *)xx->A->data;
2219     y = (Mat_SeqAIJ *)yy->A->data;
2220     bnz = PetscBLASIntCast(x->nz);
2221     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2222     x = (Mat_SeqAIJ *)xx->B->data;
2223     y = (Mat_SeqAIJ *)yy->B->data;
2224     bnz = PetscBLASIntCast(x->nz);
2225     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2226   } else if (str == SUBSET_NONZERO_PATTERN) {
2227     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2228 
2229     x = (Mat_SeqAIJ *)xx->B->data;
2230     y = (Mat_SeqAIJ *)yy->B->data;
2231     if (y->xtoy && y->XtoY != xx->B) {
2232       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2233       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2234     }
2235     if (!y->xtoy) { /* get xtoy */
2236       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2237       y->XtoY = xx->B;
2238       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2239     }
2240     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2241   } else {
2242     Mat B;
2243     PetscInt *nnz_d,*nnz_o;
2244     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2245     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2246     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2247     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2248     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2249     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2250     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2251     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2253     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2254     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2255     ierr = MatHeaderReplace(Y,B);
2256     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2258   }
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatConjugate_MPIAIJ"
2266 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2267 {
2268 #if defined(PETSC_USE_COMPLEX)
2269   PetscErrorCode ierr;
2270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2274   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2275 #else
2276   PetscFunctionBegin;
2277 #endif
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatRealPart_MPIAIJ"
2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2284 {
2285   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2286   PetscErrorCode ierr;
2287 
2288   PetscFunctionBegin;
2289   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2290   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2297 {
2298   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2303   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #ifdef PETSC_HAVE_PBGL
2308 
2309 #include <boost/parallel/mpi/bsp_process_group.hpp>
2310 #include <boost/graph/distributed/ilu_default_graph.hpp>
2311 #include <boost/graph/distributed/ilu_0_block.hpp>
2312 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2313 #include <boost/graph/distributed/petsc/interface.hpp>
2314 #include <boost/multi_array.hpp>
2315 #include <boost/parallel/distributed_property_map->hpp>
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2319 /*
2320   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2321 */
2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2323 {
2324   namespace petsc = boost::distributed::petsc;
2325 
2326   namespace graph_dist = boost::graph::distributed;
2327   using boost::graph::distributed::ilu_default::process_group_type;
2328   using boost::graph::ilu_permuted;
2329 
2330   PetscBool       row_identity, col_identity;
2331   PetscContainer  c;
2332   PetscInt        m, n, M, N;
2333   PetscErrorCode  ierr;
2334 
2335   PetscFunctionBegin;
2336   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2337   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2338   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2339   if (!row_identity || !col_identity) {
2340     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2341   }
2342 
2343   process_group_type pg;
2344   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2345   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2346   lgraph_type&   level_graph = *lgraph_p;
2347   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2348 
2349   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2350   ilu_permuted(level_graph);
2351 
2352   /* put together the new matrix */
2353   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2354   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2355   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2356   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2357   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2358   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2359   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361 
2362   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2363   ierr = PetscContainerSetPointer(c, lgraph_p);
2364   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2365   ierr = PetscContainerDestroy(&c);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2372 {
2373   PetscFunctionBegin;
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatSolve_MPIAIJ"
2379 /*
2380   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2381 */
2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2383 {
2384   namespace graph_dist = boost::graph::distributed;
2385 
2386   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2387   lgraph_type*   lgraph_p;
2388   PetscContainer c;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2393   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2394   ierr = VecCopy(b, x);CHKERRQ(ierr);
2395 
2396   PetscScalar* array_x;
2397   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2398   PetscInt sx;
2399   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2400 
2401   PetscScalar* array_b;
2402   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2403   PetscInt sb;
2404   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2405 
2406   lgraph_type&   level_graph = *lgraph_p;
2407   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2408 
2409   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2410   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2411                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2412 
2413   typedef boost::iterator_property_map<array_ref_type::iterator,
2414                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2415   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2416                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2417 
2418   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2419 
2420   PetscFunctionReturn(0);
2421 }
2422 #endif
2423 
2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2425   PetscInt       nzlocal,nsends,nrecvs;
2426   PetscMPIInt    *send_rank,*recv_rank;
2427   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2428   PetscScalar    *sbuf_a,**rbuf_a;
2429   PetscErrorCode (*Destroy)(Mat);
2430 } Mat_Redundant;
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2435 {
2436   PetscErrorCode       ierr;
2437   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2438   PetscInt             i;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2442   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2443   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2444   for (i=0; i<redund->nrecvs; i++){
2445     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2446     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2447   }
2448   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2449   ierr = PetscFree(redund);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatDestroy_MatRedundant"
2455 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2456 {
2457   PetscErrorCode  ierr;
2458   PetscContainer  container;
2459   Mat_Redundant   *redund=PETSC_NULL;
2460 
2461   PetscFunctionBegin;
2462   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2463   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2464   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2465   A->ops->destroy = redund->Destroy;
2466   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2467   if (A->ops->destroy) {
2468     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2469   }
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2476 {
2477   PetscMPIInt    rank,size;
2478   MPI_Comm       comm=((PetscObject)mat)->comm;
2479   PetscErrorCode ierr;
2480   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2481   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2482   PetscInt       *rowrange=mat->rmap->range;
2483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2484   Mat            A=aij->A,B=aij->B,C=*matredundant;
2485   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2486   PetscScalar    *sbuf_a;
2487   PetscInt       nzlocal=a->nz+b->nz;
2488   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2489   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2490   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2491   MatScalar      *aworkA,*aworkB;
2492   PetscScalar    *vals;
2493   PetscMPIInt    tag1,tag2,tag3,imdex;
2494   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2495                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2496   MPI_Status     recv_status,*send_status;
2497   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2498   PetscInt       **rbuf_j=PETSC_NULL;
2499   PetscScalar    **rbuf_a=PETSC_NULL;
2500   Mat_Redundant  *redund=PETSC_NULL;
2501   PetscContainer container;
2502 
2503   PetscFunctionBegin;
2504   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2505   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2506 
2507   if (reuse == MAT_REUSE_MATRIX) {
2508     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2509     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2510     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2511     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2512     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2513     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2514     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2515     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2516 
2517     nsends    = redund->nsends;
2518     nrecvs    = redund->nrecvs;
2519     send_rank = redund->send_rank;
2520     recv_rank = redund->recv_rank;
2521     sbuf_nz   = redund->sbuf_nz;
2522     rbuf_nz   = redund->rbuf_nz;
2523     sbuf_j    = redund->sbuf_j;
2524     sbuf_a    = redund->sbuf_a;
2525     rbuf_j    = redund->rbuf_j;
2526     rbuf_a    = redund->rbuf_a;
2527   }
2528 
2529   if (reuse == MAT_INITIAL_MATRIX){
2530     PetscMPIInt  subrank,subsize;
2531     PetscInt     nleftover,np_subcomm;
2532     /* get the destination processors' id send_rank, nsends and nrecvs */
2533     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2534     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2535     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2536     np_subcomm = size/nsubcomm;
2537     nleftover  = size - nsubcomm*np_subcomm;
2538     nsends = 0; nrecvs = 0;
2539     for (i=0; i<size; i++){ /* i=rank*/
2540       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2541         send_rank[nsends] = i; nsends++;
2542         recv_rank[nrecvs++] = i;
2543       }
2544     }
2545     if (rank >= size - nleftover){/* this proc is a leftover processor */
2546       i = size-nleftover-1;
2547       j = 0;
2548       while (j < nsubcomm - nleftover){
2549         send_rank[nsends++] = i;
2550         i--; j++;
2551       }
2552     }
2553 
2554     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2555       for (i=0; i<nleftover; i++){
2556         recv_rank[nrecvs++] = size-nleftover+i;
2557       }
2558     }
2559 
2560     /* allocate sbuf_j, sbuf_a */
2561     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2562     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2563     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2564   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2565 
2566   /* copy mat's local entries into the buffers */
2567   if (reuse == MAT_INITIAL_MATRIX){
2568     rownz_max = 0;
2569     rptr = sbuf_j;
2570     cols = sbuf_j + rend-rstart + 1;
2571     vals = sbuf_a;
2572     rptr[0] = 0;
2573     for (i=0; i<rend-rstart; i++){
2574       row = i + rstart;
2575       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2576       ncols  = nzA + nzB;
2577       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2578       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2579       /* load the column indices for this row into cols */
2580       lwrite = 0;
2581       for (l=0; l<nzB; l++) {
2582         if ((ctmp = bmap[cworkB[l]]) < cstart){
2583           vals[lwrite]   = aworkB[l];
2584           cols[lwrite++] = ctmp;
2585         }
2586       }
2587       for (l=0; l<nzA; l++){
2588         vals[lwrite]   = aworkA[l];
2589         cols[lwrite++] = cstart + cworkA[l];
2590       }
2591       for (l=0; l<nzB; l++) {
2592         if ((ctmp = bmap[cworkB[l]]) >= cend){
2593           vals[lwrite]   = aworkB[l];
2594           cols[lwrite++] = ctmp;
2595         }
2596       }
2597       vals += ncols;
2598       cols += ncols;
2599       rptr[i+1] = rptr[i] + ncols;
2600       if (rownz_max < ncols) rownz_max = ncols;
2601     }
2602     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2603   } else { /* only copy matrix values into sbuf_a */
2604     rptr = sbuf_j;
2605     vals = sbuf_a;
2606     rptr[0] = 0;
2607     for (i=0; i<rend-rstart; i++){
2608       row = i + rstart;
2609       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2610       ncols  = nzA + nzB;
2611       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2612       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2613       lwrite = 0;
2614       for (l=0; l<nzB; l++) {
2615         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2616       }
2617       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2618       for (l=0; l<nzB; l++) {
2619         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2620       }
2621       vals += ncols;
2622       rptr[i+1] = rptr[i] + ncols;
2623     }
2624   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2625 
2626   /* send nzlocal to others, and recv other's nzlocal */
2627   /*--------------------------------------------------*/
2628   if (reuse == MAT_INITIAL_MATRIX){
2629     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2630     s_waits2 = s_waits3 + nsends;
2631     s_waits1 = s_waits2 + nsends;
2632     r_waits1 = s_waits1 + nsends;
2633     r_waits2 = r_waits1 + nrecvs;
2634     r_waits3 = r_waits2 + nrecvs;
2635   } else {
2636     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2637     r_waits3 = s_waits3 + nsends;
2638   }
2639 
2640   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2641   if (reuse == MAT_INITIAL_MATRIX){
2642     /* get new tags to keep the communication clean */
2643     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2644     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2645     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2646 
2647     /* post receives of other's nzlocal */
2648     for (i=0; i<nrecvs; i++){
2649       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2650     }
2651     /* send nzlocal to others */
2652     for (i=0; i<nsends; i++){
2653       sbuf_nz[i] = nzlocal;
2654       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2655     }
2656     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2657     count = nrecvs;
2658     while (count) {
2659       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2660       recv_rank[imdex] = recv_status.MPI_SOURCE;
2661       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2662       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2663 
2664       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2665       rbuf_nz[imdex] += i + 2;
2666       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2667       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2668       count--;
2669     }
2670     /* wait on sends of nzlocal */
2671     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2672     /* send mat->i,j to others, and recv from other's */
2673     /*------------------------------------------------*/
2674     for (i=0; i<nsends; i++){
2675       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2676       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2677     }
2678     /* wait on receives of mat->i,j */
2679     /*------------------------------*/
2680     count = nrecvs;
2681     while (count) {
2682       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2683       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2684       count--;
2685     }
2686     /* wait on sends of mat->i,j */
2687     /*---------------------------*/
2688     if (nsends) {
2689       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2690     }
2691   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2692 
2693   /* post receives, send and receive mat->a */
2694   /*----------------------------------------*/
2695   for (imdex=0; imdex<nrecvs; imdex++) {
2696     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2697   }
2698   for (i=0; i<nsends; i++){
2699     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2700   }
2701   count = nrecvs;
2702   while (count) {
2703     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2704     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2705     count--;
2706   }
2707   if (nsends) {
2708     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2709   }
2710 
2711   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2712 
2713   /* create redundant matrix */
2714   /*-------------------------*/
2715   if (reuse == MAT_INITIAL_MATRIX){
2716     /* compute rownz_max for preallocation */
2717     for (imdex=0; imdex<nrecvs; imdex++){
2718       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2719       rptr = rbuf_j[imdex];
2720       for (i=0; i<j; i++){
2721         ncols = rptr[i+1] - rptr[i];
2722         if (rownz_max < ncols) rownz_max = ncols;
2723       }
2724     }
2725 
2726     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2727     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2728     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2729     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2730     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2731     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2732   } else {
2733     C = *matredundant;
2734   }
2735 
2736   /* insert local matrix entries */
2737   rptr = sbuf_j;
2738   cols = sbuf_j + rend-rstart + 1;
2739   vals = sbuf_a;
2740   for (i=0; i<rend-rstart; i++){
2741     row   = i + rstart;
2742     ncols = rptr[i+1] - rptr[i];
2743     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2744     vals += ncols;
2745     cols += ncols;
2746   }
2747   /* insert received matrix entries */
2748   for (imdex=0; imdex<nrecvs; imdex++){
2749     rstart = rowrange[recv_rank[imdex]];
2750     rend   = rowrange[recv_rank[imdex]+1];
2751     rptr = rbuf_j[imdex];
2752     cols = rbuf_j[imdex] + rend-rstart + 1;
2753     vals = rbuf_a[imdex];
2754     for (i=0; i<rend-rstart; i++){
2755       row   = i + rstart;
2756       ncols = rptr[i+1] - rptr[i];
2757       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2758       vals += ncols;
2759       cols += ncols;
2760     }
2761   }
2762   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2763   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2764   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2765   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2766   if (reuse == MAT_INITIAL_MATRIX) {
2767     PetscContainer container;
2768     *matredundant = C;
2769     /* create a supporting struct and attach it to C for reuse */
2770     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2771     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2772     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2773     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2774     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2775     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2776 
2777     redund->nzlocal = nzlocal;
2778     redund->nsends  = nsends;
2779     redund->nrecvs  = nrecvs;
2780     redund->send_rank = send_rank;
2781     redund->recv_rank = recv_rank;
2782     redund->sbuf_nz = sbuf_nz;
2783     redund->rbuf_nz = rbuf_nz;
2784     redund->sbuf_j  = sbuf_j;
2785     redund->sbuf_a  = sbuf_a;
2786     redund->rbuf_j  = rbuf_j;
2787     redund->rbuf_a  = rbuf_a;
2788 
2789     redund->Destroy = C->ops->destroy;
2790     C->ops->destroy = MatDestroy_MatRedundant;
2791   }
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2797 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2798 {
2799   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2800   PetscErrorCode ierr;
2801   PetscInt       i,*idxb = 0;
2802   PetscScalar    *va,*vb;
2803   Vec            vtmp;
2804 
2805   PetscFunctionBegin;
2806   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2807   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2808   if (idx) {
2809     for (i=0; i<A->rmap->n; i++) {
2810       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2811     }
2812   }
2813 
2814   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2815   if (idx) {
2816     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2817   }
2818   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2819   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2820 
2821   for (i=0; i<A->rmap->n; i++){
2822     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2823       va[i] = vb[i];
2824       if (idx) idx[i] = a->garray[idxb[i]];
2825     }
2826   }
2827 
2828   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2829   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2830   ierr = PetscFree(idxb);CHKERRQ(ierr);
2831   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 #undef __FUNCT__
2836 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2837 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2838 {
2839   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2840   PetscErrorCode ierr;
2841   PetscInt       i,*idxb = 0;
2842   PetscScalar    *va,*vb;
2843   Vec            vtmp;
2844 
2845   PetscFunctionBegin;
2846   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2847   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2848   if (idx) {
2849     for (i=0; i<A->cmap->n; i++) {
2850       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2851     }
2852   }
2853 
2854   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2855   if (idx) {
2856     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2857   }
2858   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2859   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2860 
2861   for (i=0; i<A->rmap->n; i++){
2862     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2863       va[i] = vb[i];
2864       if (idx) idx[i] = a->garray[idxb[i]];
2865     }
2866   }
2867 
2868   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2869   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2870   ierr = PetscFree(idxb);CHKERRQ(ierr);
2871   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 #undef __FUNCT__
2876 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2877 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2878 {
2879   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2880   PetscInt       n      = A->rmap->n;
2881   PetscInt       cstart = A->cmap->rstart;
2882   PetscInt      *cmap   = mat->garray;
2883   PetscInt      *diagIdx, *offdiagIdx;
2884   Vec            diagV, offdiagV;
2885   PetscScalar   *a, *diagA, *offdiagA;
2886   PetscInt       r;
2887   PetscErrorCode ierr;
2888 
2889   PetscFunctionBegin;
2890   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2891   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2892   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2893   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2894   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2895   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2896   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2897   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2898   for(r = 0; r < n; ++r) {
2899     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2900       a[r]   = diagA[r];
2901       idx[r] = cstart + diagIdx[r];
2902     } else {
2903       a[r]   = offdiagA[r];
2904       idx[r] = cmap[offdiagIdx[r]];
2905     }
2906   }
2907   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2908   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2909   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2910   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2911   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2912   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 #undef __FUNCT__
2917 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2918 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2919 {
2920   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2921   PetscInt       n      = A->rmap->n;
2922   PetscInt       cstart = A->cmap->rstart;
2923   PetscInt      *cmap   = mat->garray;
2924   PetscInt      *diagIdx, *offdiagIdx;
2925   Vec            diagV, offdiagV;
2926   PetscScalar   *a, *diagA, *offdiagA;
2927   PetscInt       r;
2928   PetscErrorCode ierr;
2929 
2930   PetscFunctionBegin;
2931   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2932   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2933   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2934   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2935   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2936   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2937   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2938   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2939   for(r = 0; r < n; ++r) {
2940     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2941       a[r]   = diagA[r];
2942       idx[r] = cstart + diagIdx[r];
2943     } else {
2944       a[r]   = offdiagA[r];
2945       idx[r] = cmap[offdiagIdx[r]];
2946     }
2947   }
2948   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2949   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2950   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2951   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2952   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2953   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 #undef __FUNCT__
2958 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2959 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2960 {
2961   PetscErrorCode ierr;
2962   Mat            *dummy;
2963 
2964   PetscFunctionBegin;
2965   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2966   *newmat = *dummy;
2967   ierr = PetscFree(dummy);CHKERRQ(ierr);
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2975 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2976 {
2977   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2978   PetscErrorCode ierr;
2979 
2980   PetscFunctionBegin;
2981   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2982   PetscFunctionReturn(0);
2983 }
2984 
2985 
2986 /* -------------------------------------------------------------------*/
2987 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2988        MatGetRow_MPIAIJ,
2989        MatRestoreRow_MPIAIJ,
2990        MatMult_MPIAIJ,
2991 /* 4*/ MatMultAdd_MPIAIJ,
2992        MatMultTranspose_MPIAIJ,
2993        MatMultTransposeAdd_MPIAIJ,
2994 #ifdef PETSC_HAVE_PBGL
2995        MatSolve_MPIAIJ,
2996 #else
2997        0,
2998 #endif
2999        0,
3000        0,
3001 /*10*/ 0,
3002        0,
3003        0,
3004        MatSOR_MPIAIJ,
3005        MatTranspose_MPIAIJ,
3006 /*15*/ MatGetInfo_MPIAIJ,
3007        MatEqual_MPIAIJ,
3008        MatGetDiagonal_MPIAIJ,
3009        MatDiagonalScale_MPIAIJ,
3010        MatNorm_MPIAIJ,
3011 /*20*/ MatAssemblyBegin_MPIAIJ,
3012        MatAssemblyEnd_MPIAIJ,
3013        MatSetOption_MPIAIJ,
3014        MatZeroEntries_MPIAIJ,
3015 /*24*/ MatZeroRows_MPIAIJ,
3016        0,
3017 #ifdef PETSC_HAVE_PBGL
3018        0,
3019 #else
3020        0,
3021 #endif
3022        0,
3023        0,
3024 /*29*/ MatSetUp_MPIAIJ,
3025 #ifdef PETSC_HAVE_PBGL
3026        0,
3027 #else
3028        0,
3029 #endif
3030        0,
3031        0,
3032        0,
3033 /*34*/ MatDuplicate_MPIAIJ,
3034        0,
3035        0,
3036        0,
3037        0,
3038 /*39*/ MatAXPY_MPIAIJ,
3039        MatGetSubMatrices_MPIAIJ,
3040        MatIncreaseOverlap_MPIAIJ,
3041        MatGetValues_MPIAIJ,
3042        MatCopy_MPIAIJ,
3043 /*44*/ MatGetRowMax_MPIAIJ,
3044        MatScale_MPIAIJ,
3045        0,
3046        0,
3047        MatZeroRowsColumns_MPIAIJ,
3048 /*49*/ 0,
3049        0,
3050        0,
3051        0,
3052        0,
3053 /*54*/ MatFDColoringCreate_MPIAIJ,
3054        0,
3055        MatSetUnfactored_MPIAIJ,
3056        0, /* MatPermute_MPIAIJ, impl currently broken */
3057        0,
3058 /*59*/ MatGetSubMatrix_MPIAIJ,
3059        MatDestroy_MPIAIJ,
3060        MatView_MPIAIJ,
3061        0,
3062        0,
3063 /*64*/ 0,
3064        0,
3065        0,
3066        0,
3067        0,
3068 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3069        MatGetRowMinAbs_MPIAIJ,
3070        0,
3071        MatSetColoring_MPIAIJ,
3072 #if defined(PETSC_HAVE_ADIC)
3073        MatSetValuesAdic_MPIAIJ,
3074 #else
3075        0,
3076 #endif
3077        MatSetValuesAdifor_MPIAIJ,
3078 /*75*/ MatFDColoringApply_AIJ,
3079        0,
3080        0,
3081        0,
3082        0,
3083 /*80*/ 0,
3084        0,
3085        0,
3086 /*83*/ MatLoad_MPIAIJ,
3087        0,
3088        0,
3089        0,
3090        0,
3091        0,
3092 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3093        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3094        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3095        MatPtAP_Basic,
3096        MatPtAPSymbolic_MPIAIJ,
3097 /*94*/ MatPtAPNumeric_MPIAIJ,
3098        0,
3099        0,
3100        0,
3101        0,
3102 /*99*/ 0,
3103        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3104        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3105        MatConjugate_MPIAIJ,
3106        0,
3107 /*104*/MatSetValuesRow_MPIAIJ,
3108        MatRealPart_MPIAIJ,
3109        MatImaginaryPart_MPIAIJ,
3110        0,
3111        0,
3112 /*109*/0,
3113        MatGetRedundantMatrix_MPIAIJ,
3114        MatGetRowMin_MPIAIJ,
3115        0,
3116        0,
3117 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3118        0,
3119        0,
3120        0,
3121        0,
3122 /*119*/0,
3123        0,
3124        0,
3125        0,
3126        MatGetMultiProcBlock_MPIAIJ,
3127 /*124*/MatFindNonZeroRows_MPIAIJ,
3128        MatGetColumnNorms_MPIAIJ,
3129        MatInvertBlockDiagonal_MPIAIJ,
3130        0,
3131        MatGetSubMatricesParallel_MPIAIJ,
3132 /*129*/0,
3133        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3134        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3135        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3136        0,
3137 /*134*/0,
3138        0,
3139        0,
3140        0,
3141        0
3142 };
3143 
3144 /* ----------------------------------------------------------------------------------------*/
3145 
3146 EXTERN_C_BEGIN
3147 #undef __FUNCT__
3148 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3149 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3150 {
3151   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3152   PetscErrorCode ierr;
3153 
3154   PetscFunctionBegin;
3155   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3156   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3157   PetscFunctionReturn(0);
3158 }
3159 EXTERN_C_END
3160 
3161 EXTERN_C_BEGIN
3162 #undef __FUNCT__
3163 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3164 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3165 {
3166   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3167   PetscErrorCode ierr;
3168 
3169   PetscFunctionBegin;
3170   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3171   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3172   PetscFunctionReturn(0);
3173 }
3174 EXTERN_C_END
3175 
3176 EXTERN_C_BEGIN
3177 #undef __FUNCT__
3178 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3179 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3180 {
3181   Mat_MPIAIJ     *b;
3182   PetscErrorCode ierr;
3183   PetscInt       i;
3184   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3185 
3186   PetscFunctionBegin;
3187   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3188   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3189   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3190   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3191   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3192   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3193 
3194   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3195   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3196   if (d_nnz) {
3197     for (i=0; i<B->rmap->n; i++) {
3198       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3199     }
3200   }
3201   if (o_nnz) {
3202     for (i=0; i<B->rmap->n; i++) {
3203       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3204     }
3205   }
3206   b = (Mat_MPIAIJ*)B->data;
3207 
3208   if (!B->preallocated) {
3209     /* Explicitly create 2 MATSEQAIJ matrices. */
3210     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3211     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3212     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3213     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3214     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3215     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3216     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3217     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3218     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3219     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3220   }
3221 
3222   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3223   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3224   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3225   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3226   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3227   B->preallocated = PETSC_TRUE;
3228   PetscFunctionReturn(0);
3229 }
3230 EXTERN_C_END
3231 
3232 #undef __FUNCT__
3233 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3234 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3235 {
3236   Mat            mat;
3237   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3238   PetscErrorCode ierr;
3239 
3240   PetscFunctionBegin;
3241   *newmat       = 0;
3242   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3243   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3244   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3245   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3246   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3247   a    = (Mat_MPIAIJ*)mat->data;
3248 
3249   mat->factortype    = matin->factortype;
3250   mat->rmap->bs      = matin->rmap->bs;
3251   mat->cmap->bs      = matin->cmap->bs;
3252   mat->assembled    = PETSC_TRUE;
3253   mat->insertmode   = NOT_SET_VALUES;
3254   mat->preallocated = PETSC_TRUE;
3255 
3256   a->size           = oldmat->size;
3257   a->rank           = oldmat->rank;
3258   a->donotstash     = oldmat->donotstash;
3259   a->roworiented    = oldmat->roworiented;
3260   a->rowindices     = 0;
3261   a->rowvalues      = 0;
3262   a->getrowactive   = PETSC_FALSE;
3263 
3264   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3265   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3266 
3267   if (oldmat->colmap) {
3268 #if defined (PETSC_USE_CTABLE)
3269     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3270 #else
3271     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3272     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3273     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3274 #endif
3275   } else a->colmap = 0;
3276   if (oldmat->garray) {
3277     PetscInt len;
3278     len  = oldmat->B->cmap->n;
3279     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3280     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3281     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3282   } else a->garray = 0;
3283 
3284   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3285   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3286   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3287   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3288   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3289   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3290   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3291   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3292   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3293   *newmat = mat;
3294   PetscFunctionReturn(0);
3295 }
3296 
3297 
3298 
3299 #undef __FUNCT__
3300 #define __FUNCT__ "MatLoad_MPIAIJ"
3301 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3302 {
3303   PetscScalar    *vals,*svals;
3304   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3305   PetscErrorCode ierr;
3306   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3307   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3308   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3309   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3310   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3311   int            fd;
3312   PetscInt       bs = 1;
3313 
3314   PetscFunctionBegin;
3315   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3316   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3317   if (!rank) {
3318     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3319     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3320     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3321   }
3322 
3323   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3324   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3325   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3326 
3327   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3328 
3329   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3330   M = header[1]; N = header[2];
3331   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3332   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3333   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3334 
3335   /* If global sizes are set, check if they are consistent with that given in the file */
3336   if (sizesset) {
3337     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3338   }
3339   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3340   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3341 
3342   /* determine ownership of all (block) rows */
3343   if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3344   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3345   else m = newMat->rmap->n; /* Set by user */
3346 
3347   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3348   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3349 
3350   /* First process needs enough room for process with most rows */
3351   if (!rank) {
3352     mmax       = rowners[1];
3353     for (i=2; i<size; i++) {
3354       mmax = PetscMax(mmax,rowners[i]);
3355     }
3356   } else mmax = m;
3357 
3358   rowners[0] = 0;
3359   for (i=2; i<=size; i++) {
3360     rowners[i] += rowners[i-1];
3361   }
3362   rstart = rowners[rank];
3363   rend   = rowners[rank+1];
3364 
3365   /* distribute row lengths to all processors */
3366   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3367   if (!rank) {
3368     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3369     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3370     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3371     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3372     for (j=0; j<m; j++) {
3373       procsnz[0] += ourlens[j];
3374     }
3375     for (i=1; i<size; i++) {
3376       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3377       /* calculate the number of nonzeros on each processor */
3378       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3379         procsnz[i] += rowlengths[j];
3380       }
3381       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3382     }
3383     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3384   } else {
3385     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3386   }
3387 
3388   if (!rank) {
3389     /* determine max buffer needed and allocate it */
3390     maxnz = 0;
3391     for (i=0; i<size; i++) {
3392       maxnz = PetscMax(maxnz,procsnz[i]);
3393     }
3394     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3395 
3396     /* read in my part of the matrix column indices  */
3397     nz   = procsnz[0];
3398     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3399     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3400 
3401     /* read in every one elses and ship off */
3402     for (i=1; i<size; i++) {
3403       nz     = procsnz[i];
3404       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3405       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3406     }
3407     ierr = PetscFree(cols);CHKERRQ(ierr);
3408   } else {
3409     /* determine buffer space needed for message */
3410     nz = 0;
3411     for (i=0; i<m; i++) {
3412       nz += ourlens[i];
3413     }
3414     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3415 
3416     /* receive message of column indices*/
3417     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3418   }
3419 
3420   /* determine column ownership if matrix is not square */
3421   if (N != M) {
3422     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3423     else n = newMat->cmap->n;
3424     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3425     cstart = cend - n;
3426   } else {
3427     cstart = rstart;
3428     cend   = rend;
3429     n      = cend - cstart;
3430   }
3431 
3432   /* loop over local rows, determining number of off diagonal entries */
3433   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3434   jj = 0;
3435   for (i=0; i<m; i++) {
3436     for (j=0; j<ourlens[i]; j++) {
3437       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3438       jj++;
3439     }
3440   }
3441 
3442   for (i=0; i<m; i++) {
3443     ourlens[i] -= offlens[i];
3444   }
3445   if (!sizesset) {
3446     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3447   }
3448 
3449   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3450 
3451   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3452 
3453   for (i=0; i<m; i++) {
3454     ourlens[i] += offlens[i];
3455   }
3456 
3457   if (!rank) {
3458     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3459 
3460     /* read in my part of the matrix numerical values  */
3461     nz   = procsnz[0];
3462     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3463 
3464     /* insert into matrix */
3465     jj      = rstart;
3466     smycols = mycols;
3467     svals   = vals;
3468     for (i=0; i<m; i++) {
3469       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3470       smycols += ourlens[i];
3471       svals   += ourlens[i];
3472       jj++;
3473     }
3474 
3475     /* read in other processors and ship out */
3476     for (i=1; i<size; i++) {
3477       nz     = procsnz[i];
3478       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3479       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3480     }
3481     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3482   } else {
3483     /* receive numeric values */
3484     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3485 
3486     /* receive message of values*/
3487     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3488 
3489     /* insert into matrix */
3490     jj      = rstart;
3491     smycols = mycols;
3492     svals   = vals;
3493     for (i=0; i<m; i++) {
3494       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3495       smycols += ourlens[i];
3496       svals   += ourlens[i];
3497       jj++;
3498     }
3499   }
3500   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3501   ierr = PetscFree(vals);CHKERRQ(ierr);
3502   ierr = PetscFree(mycols);CHKERRQ(ierr);
3503   ierr = PetscFree(rowners);CHKERRQ(ierr);
3504 
3505   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3506   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3507 
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 #undef __FUNCT__
3512 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3513 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3514 {
3515   PetscErrorCode ierr;
3516   IS             iscol_local;
3517   PetscInt       csize;
3518 
3519   PetscFunctionBegin;
3520   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3521   if (call == MAT_REUSE_MATRIX) {
3522     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3523     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3524   } else {
3525     PetscInt cbs;
3526     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3527     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3528     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3529   }
3530   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3531   if (call == MAT_INITIAL_MATRIX) {
3532     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3533     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3534   }
3535   PetscFunctionReturn(0);
3536 }
3537 
3538 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3541 /*
3542     Not great since it makes two copies of the submatrix, first an SeqAIJ
3543   in local and then by concatenating the local matrices the end result.
3544   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3545 
3546   Note: This requires a sequential iscol with all indices.
3547 */
3548 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3549 {
3550   PetscErrorCode ierr;
3551   PetscMPIInt    rank,size;
3552   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3553   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3554   PetscBool      allcolumns, colflag;
3555   Mat            M,Mreuse;
3556   MatScalar      *vwork,*aa;
3557   MPI_Comm       comm = ((PetscObject)mat)->comm;
3558   Mat_SeqAIJ     *aij;
3559 
3560 
3561   PetscFunctionBegin;
3562   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3563   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3564 
3565   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3566   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3567   if (colflag && ncol == mat->cmap->N){
3568     allcolumns = PETSC_TRUE;
3569   } else {
3570     allcolumns = PETSC_FALSE;
3571   }
3572   if (call ==  MAT_REUSE_MATRIX) {
3573     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3574     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3575     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3576   } else {
3577     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3578   }
3579 
3580   /*
3581       m - number of local rows
3582       n - number of columns (same on all processors)
3583       rstart - first row in new global matrix generated
3584   */
3585   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3586   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3587   if (call == MAT_INITIAL_MATRIX) {
3588     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3589     ii  = aij->i;
3590     jj  = aij->j;
3591 
3592     /*
3593         Determine the number of non-zeros in the diagonal and off-diagonal
3594         portions of the matrix in order to do correct preallocation
3595     */
3596 
3597     /* first get start and end of "diagonal" columns */
3598     if (csize == PETSC_DECIDE) {
3599       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3600       if (mglobal == n) { /* square matrix */
3601 	nlocal = m;
3602       } else {
3603         nlocal = n/size + ((n % size) > rank);
3604       }
3605     } else {
3606       nlocal = csize;
3607     }
3608     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3609     rstart = rend - nlocal;
3610     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3611 
3612     /* next, compute all the lengths */
3613     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3614     olens = dlens + m;
3615     for (i=0; i<m; i++) {
3616       jend = ii[i+1] - ii[i];
3617       olen = 0;
3618       dlen = 0;
3619       for (j=0; j<jend; j++) {
3620         if (*jj < rstart || *jj >= rend) olen++;
3621         else dlen++;
3622         jj++;
3623       }
3624       olens[i] = olen;
3625       dlens[i] = dlen;
3626     }
3627     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3628     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3629     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3630     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3631     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3632     ierr = PetscFree(dlens);CHKERRQ(ierr);
3633   } else {
3634     PetscInt ml,nl;
3635 
3636     M = *newmat;
3637     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3638     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3639     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3640     /*
3641          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3642        rather than the slower MatSetValues().
3643     */
3644     M->was_assembled = PETSC_TRUE;
3645     M->assembled     = PETSC_FALSE;
3646   }
3647   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3648   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3649   ii  = aij->i;
3650   jj  = aij->j;
3651   aa  = aij->a;
3652   for (i=0; i<m; i++) {
3653     row   = rstart + i;
3654     nz    = ii[i+1] - ii[i];
3655     cwork = jj;     jj += nz;
3656     vwork = aa;     aa += nz;
3657     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3658   }
3659 
3660   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3661   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3662   *newmat = M;
3663 
3664   /* save submatrix used in processor for next request */
3665   if (call ==  MAT_INITIAL_MATRIX) {
3666     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3667     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3668   }
3669 
3670   PetscFunctionReturn(0);
3671 }
3672 
3673 EXTERN_C_BEGIN
3674 #undef __FUNCT__
3675 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3676 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3677 {
3678   PetscInt       m,cstart, cend,j,nnz,i,d;
3679   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3680   const PetscInt *JJ;
3681   PetscScalar    *values;
3682   PetscErrorCode ierr;
3683 
3684   PetscFunctionBegin;
3685   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3686 
3687   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3688   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3689   m      = B->rmap->n;
3690   cstart = B->cmap->rstart;
3691   cend   = B->cmap->rend;
3692   rstart = B->rmap->rstart;
3693 
3694   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3695 
3696 #if defined(PETSC_USE_DEBUGGING)
3697   for (i=0; i<m; i++) {
3698     nnz     = Ii[i+1]- Ii[i];
3699     JJ      = J + Ii[i];
3700     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3701     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3702     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3703   }
3704 #endif
3705 
3706   for (i=0; i<m; i++) {
3707     nnz     = Ii[i+1]- Ii[i];
3708     JJ      = J + Ii[i];
3709     nnz_max = PetscMax(nnz_max,nnz);
3710     d       = 0;
3711     for (j=0; j<nnz; j++) {
3712       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3713     }
3714     d_nnz[i] = d;
3715     o_nnz[i] = nnz - d;
3716   }
3717   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3718   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3719 
3720   if (v) values = (PetscScalar*)v;
3721   else {
3722     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3723     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3724   }
3725 
3726   for (i=0; i<m; i++) {
3727     ii   = i + rstart;
3728     nnz  = Ii[i+1]- Ii[i];
3729     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3730   }
3731   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3732   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3733 
3734   if (!v) {
3735     ierr = PetscFree(values);CHKERRQ(ierr);
3736   }
3737   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3738   PetscFunctionReturn(0);
3739 }
3740 EXTERN_C_END
3741 
3742 #undef __FUNCT__
3743 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3744 /*@
3745    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3746    (the default parallel PETSc format).
3747 
3748    Collective on MPI_Comm
3749 
3750    Input Parameters:
3751 +  B - the matrix
3752 .  i - the indices into j for the start of each local row (starts with zero)
3753 .  j - the column indices for each local row (starts with zero)
3754 -  v - optional values in the matrix
3755 
3756    Level: developer
3757 
3758    Notes:
3759        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3760      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3761      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3762 
3763        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3764 
3765        The format which is used for the sparse matrix input, is equivalent to a
3766     row-major ordering.. i.e for the following matrix, the input data expected is
3767     as shown:
3768 
3769         1 0 0
3770         2 0 3     P0
3771        -------
3772         4 5 6     P1
3773 
3774      Process0 [P0]: rows_owned=[0,1]
3775         i =  {0,1,3}  [size = nrow+1  = 2+1]
3776         j =  {0,0,2}  [size = nz = 6]
3777         v =  {1,2,3}  [size = nz = 6]
3778 
3779      Process1 [P1]: rows_owned=[2]
3780         i =  {0,3}    [size = nrow+1  = 1+1]
3781         j =  {0,1,2}  [size = nz = 6]
3782         v =  {4,5,6}  [size = nz = 6]
3783 
3784 .keywords: matrix, aij, compressed row, sparse, parallel
3785 
3786 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3787           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3788 @*/
3789 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3790 {
3791   PetscErrorCode ierr;
3792 
3793   PetscFunctionBegin;
3794   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3795   PetscFunctionReturn(0);
3796 }
3797 
3798 #undef __FUNCT__
3799 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3800 /*@C
3801    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3802    (the default parallel PETSc format).  For good matrix assembly performance
3803    the user should preallocate the matrix storage by setting the parameters
3804    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3805    performance can be increased by more than a factor of 50.
3806 
3807    Collective on MPI_Comm
3808 
3809    Input Parameters:
3810 +  A - the matrix
3811 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3812            (same value is used for all local rows)
3813 .  d_nnz - array containing the number of nonzeros in the various rows of the
3814            DIAGONAL portion of the local submatrix (possibly different for each row)
3815            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3816            The size of this array is equal to the number of local rows, i.e 'm'.
3817            For matrices that will be factored, you must leave room for (and set)
3818            the diagonal entry even if it is zero.
3819 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3820            submatrix (same value is used for all local rows).
3821 -  o_nnz - array containing the number of nonzeros in the various rows of the
3822            OFF-DIAGONAL portion of the local submatrix (possibly different for
3823            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3824            structure. The size of this array is equal to the number
3825            of local rows, i.e 'm'.
3826 
3827    If the *_nnz parameter is given then the *_nz parameter is ignored
3828 
3829    The AIJ format (also called the Yale sparse matrix format or
3830    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3831    storage.  The stored row and column indices begin with zero.
3832    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3833 
3834    The parallel matrix is partitioned such that the first m0 rows belong to
3835    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3836    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3837 
3838    The DIAGONAL portion of the local submatrix of a processor can be defined
3839    as the submatrix which is obtained by extraction the part corresponding to
3840    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3841    first row that belongs to the processor, r2 is the last row belonging to
3842    the this processor, and c1-c2 is range of indices of the local part of a
3843    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3844    common case of a square matrix, the row and column ranges are the same and
3845    the DIAGONAL part is also square. The remaining portion of the local
3846    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3847 
3848    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3849 
3850    You can call MatGetInfo() to get information on how effective the preallocation was;
3851    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3852    You can also run with the option -info and look for messages with the string
3853    malloc in them to see if additional memory allocation was needed.
3854 
3855    Example usage:
3856 
3857    Consider the following 8x8 matrix with 34 non-zero values, that is
3858    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3859    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3860    as follows:
3861 
3862 .vb
3863             1  2  0  |  0  3  0  |  0  4
3864     Proc0   0  5  6  |  7  0  0  |  8  0
3865             9  0 10  | 11  0  0  | 12  0
3866     -------------------------------------
3867            13  0 14  | 15 16 17  |  0  0
3868     Proc1   0 18  0  | 19 20 21  |  0  0
3869             0  0  0  | 22 23  0  | 24  0
3870     -------------------------------------
3871     Proc2  25 26 27  |  0  0 28  | 29  0
3872            30  0  0  | 31 32 33  |  0 34
3873 .ve
3874 
3875    This can be represented as a collection of submatrices as:
3876 
3877 .vb
3878       A B C
3879       D E F
3880       G H I
3881 .ve
3882 
3883    Where the submatrices A,B,C are owned by proc0, D,E,F are
3884    owned by proc1, G,H,I are owned by proc2.
3885 
3886    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3887    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3888    The 'M','N' parameters are 8,8, and have the same values on all procs.
3889 
3890    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3891    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3892    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3893    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3894    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3895    matrix, ans [DF] as another SeqAIJ matrix.
3896 
3897    When d_nz, o_nz parameters are specified, d_nz storage elements are
3898    allocated for every row of the local diagonal submatrix, and o_nz
3899    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3900    One way to choose d_nz and o_nz is to use the max nonzerors per local
3901    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3902    In this case, the values of d_nz,o_nz are:
3903 .vb
3904      proc0 : dnz = 2, o_nz = 2
3905      proc1 : dnz = 3, o_nz = 2
3906      proc2 : dnz = 1, o_nz = 4
3907 .ve
3908    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3909    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3910    for proc3. i.e we are using 12+15+10=37 storage locations to store
3911    34 values.
3912 
3913    When d_nnz, o_nnz parameters are specified, the storage is specified
3914    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3915    In the above case the values for d_nnz,o_nnz are:
3916 .vb
3917      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3918      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3919      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3920 .ve
3921    Here the space allocated is sum of all the above values i.e 34, and
3922    hence pre-allocation is perfect.
3923 
3924    Level: intermediate
3925 
3926 .keywords: matrix, aij, compressed row, sparse, parallel
3927 
3928 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3929           MPIAIJ, MatGetInfo()
3930 @*/
3931 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3932 {
3933   PetscErrorCode ierr;
3934 
3935   PetscFunctionBegin;
3936   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3937   PetscValidType(B,1);
3938   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3939   PetscFunctionReturn(0);
3940 }
3941 
3942 #undef __FUNCT__
3943 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3944 /*@
3945      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3946          CSR format the local rows.
3947 
3948    Collective on MPI_Comm
3949 
3950    Input Parameters:
3951 +  comm - MPI communicator
3952 .  m - number of local rows (Cannot be PETSC_DECIDE)
3953 .  n - This value should be the same as the local size used in creating the
3954        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3955        calculated if N is given) For square matrices n is almost always m.
3956 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3957 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3958 .   i - row indices
3959 .   j - column indices
3960 -   a - matrix values
3961 
3962    Output Parameter:
3963 .   mat - the matrix
3964 
3965    Level: intermediate
3966 
3967    Notes:
3968        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3969      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3970      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3971 
3972        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3973 
3974        The format which is used for the sparse matrix input, is equivalent to a
3975     row-major ordering.. i.e for the following matrix, the input data expected is
3976     as shown:
3977 
3978         1 0 0
3979         2 0 3     P0
3980        -------
3981         4 5 6     P1
3982 
3983      Process0 [P0]: rows_owned=[0,1]
3984         i =  {0,1,3}  [size = nrow+1  = 2+1]
3985         j =  {0,0,2}  [size = nz = 6]
3986         v =  {1,2,3}  [size = nz = 6]
3987 
3988      Process1 [P1]: rows_owned=[2]
3989         i =  {0,3}    [size = nrow+1  = 1+1]
3990         j =  {0,1,2}  [size = nz = 6]
3991         v =  {4,5,6}  [size = nz = 6]
3992 
3993 .keywords: matrix, aij, compressed row, sparse, parallel
3994 
3995 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3996           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3997 @*/
3998 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3999 {
4000   PetscErrorCode ierr;
4001 
4002   PetscFunctionBegin;
4003   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4004   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4005   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4006   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4007   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4008   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4009   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 #undef __FUNCT__
4014 #define __FUNCT__ "MatCreateAIJ"
4015 /*@C
4016    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4017    (the default parallel PETSc format).  For good matrix assembly performance
4018    the user should preallocate the matrix storage by setting the parameters
4019    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4020    performance can be increased by more than a factor of 50.
4021 
4022    Collective on MPI_Comm
4023 
4024    Input Parameters:
4025 +  comm - MPI communicator
4026 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4027            This value should be the same as the local size used in creating the
4028            y vector for the matrix-vector product y = Ax.
4029 .  n - This value should be the same as the local size used in creating the
4030        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4031        calculated if N is given) For square matrices n is almost always m.
4032 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4033 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4034 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4035            (same value is used for all local rows)
4036 .  d_nnz - array containing the number of nonzeros in the various rows of the
4037            DIAGONAL portion of the local submatrix (possibly different for each row)
4038            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4039            The size of this array is equal to the number of local rows, i.e 'm'.
4040 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4041            submatrix (same value is used for all local rows).
4042 -  o_nnz - array containing the number of nonzeros in the various rows of the
4043            OFF-DIAGONAL portion of the local submatrix (possibly different for
4044            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4045            structure. The size of this array is equal to the number
4046            of local rows, i.e 'm'.
4047 
4048    Output Parameter:
4049 .  A - the matrix
4050 
4051    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4052    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4053    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4054 
4055    Notes:
4056    If the *_nnz parameter is given then the *_nz parameter is ignored
4057 
4058    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4059    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4060    storage requirements for this matrix.
4061 
4062    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4063    processor than it must be used on all processors that share the object for
4064    that argument.
4065 
4066    The user MUST specify either the local or global matrix dimensions
4067    (possibly both).
4068 
4069    The parallel matrix is partitioned across processors such that the
4070    first m0 rows belong to process 0, the next m1 rows belong to
4071    process 1, the next m2 rows belong to process 2 etc.. where
4072    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4073    values corresponding to [m x N] submatrix.
4074 
4075    The columns are logically partitioned with the n0 columns belonging
4076    to 0th partition, the next n1 columns belonging to the next
4077    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4078 
4079    The DIAGONAL portion of the local submatrix on any given processor
4080    is the submatrix corresponding to the rows and columns m,n
4081    corresponding to the given processor. i.e diagonal matrix on
4082    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4083    etc. The remaining portion of the local submatrix [m x (N-n)]
4084    constitute the OFF-DIAGONAL portion. The example below better
4085    illustrates this concept.
4086 
4087    For a square global matrix we define each processor's diagonal portion
4088    to be its local rows and the corresponding columns (a square submatrix);
4089    each processor's off-diagonal portion encompasses the remainder of the
4090    local matrix (a rectangular submatrix).
4091 
4092    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4093 
4094    When calling this routine with a single process communicator, a matrix of
4095    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4096    type of communicator, use the construction mechanism:
4097      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4098 
4099    By default, this format uses inodes (identical nodes) when possible.
4100    We search for consecutive rows with the same nonzero structure, thereby
4101    reusing matrix information to achieve increased efficiency.
4102 
4103    Options Database Keys:
4104 +  -mat_no_inode  - Do not use inodes
4105 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4106 -  -mat_aij_oneindex - Internally use indexing starting at 1
4107         rather than 0.  Note that when calling MatSetValues(),
4108         the user still MUST index entries starting at 0!
4109 
4110 
4111    Example usage:
4112 
4113    Consider the following 8x8 matrix with 34 non-zero values, that is
4114    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4115    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4116    as follows:
4117 
4118 .vb
4119             1  2  0  |  0  3  0  |  0  4
4120     Proc0   0  5  6  |  7  0  0  |  8  0
4121             9  0 10  | 11  0  0  | 12  0
4122     -------------------------------------
4123            13  0 14  | 15 16 17  |  0  0
4124     Proc1   0 18  0  | 19 20 21  |  0  0
4125             0  0  0  | 22 23  0  | 24  0
4126     -------------------------------------
4127     Proc2  25 26 27  |  0  0 28  | 29  0
4128            30  0  0  | 31 32 33  |  0 34
4129 .ve
4130 
4131    This can be represented as a collection of submatrices as:
4132 
4133 .vb
4134       A B C
4135       D E F
4136       G H I
4137 .ve
4138 
4139    Where the submatrices A,B,C are owned by proc0, D,E,F are
4140    owned by proc1, G,H,I are owned by proc2.
4141 
4142    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4143    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4144    The 'M','N' parameters are 8,8, and have the same values on all procs.
4145 
4146    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4147    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4148    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4149    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4150    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4151    matrix, ans [DF] as another SeqAIJ matrix.
4152 
4153    When d_nz, o_nz parameters are specified, d_nz storage elements are
4154    allocated for every row of the local diagonal submatrix, and o_nz
4155    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4156    One way to choose d_nz and o_nz is to use the max nonzerors per local
4157    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4158    In this case, the values of d_nz,o_nz are:
4159 .vb
4160      proc0 : dnz = 2, o_nz = 2
4161      proc1 : dnz = 3, o_nz = 2
4162      proc2 : dnz = 1, o_nz = 4
4163 .ve
4164    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4165    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4166    for proc3. i.e we are using 12+15+10=37 storage locations to store
4167    34 values.
4168 
4169    When d_nnz, o_nnz parameters are specified, the storage is specified
4170    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4171    In the above case the values for d_nnz,o_nnz are:
4172 .vb
4173      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4174      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4175      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4176 .ve
4177    Here the space allocated is sum of all the above values i.e 34, and
4178    hence pre-allocation is perfect.
4179 
4180    Level: intermediate
4181 
4182 .keywords: matrix, aij, compressed row, sparse, parallel
4183 
4184 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4185           MPIAIJ, MatCreateMPIAIJWithArrays()
4186 @*/
4187 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4188 {
4189   PetscErrorCode ierr;
4190   PetscMPIInt    size;
4191 
4192   PetscFunctionBegin;
4193   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4194   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4196   if (size > 1) {
4197     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4198     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4199   } else {
4200     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4201     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4202   }
4203   PetscFunctionReturn(0);
4204 }
4205 
4206 #undef __FUNCT__
4207 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4208 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4209 {
4210   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4211 
4212   PetscFunctionBegin;
4213   *Ad     = a->A;
4214   *Ao     = a->B;
4215   *colmap = a->garray;
4216   PetscFunctionReturn(0);
4217 }
4218 
4219 #undef __FUNCT__
4220 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4221 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4222 {
4223   PetscErrorCode ierr;
4224   PetscInt       i;
4225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4226 
4227   PetscFunctionBegin;
4228   if (coloring->ctype == IS_COLORING_GLOBAL) {
4229     ISColoringValue *allcolors,*colors;
4230     ISColoring      ocoloring;
4231 
4232     /* set coloring for diagonal portion */
4233     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4234 
4235     /* set coloring for off-diagonal portion */
4236     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4237     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4238     for (i=0; i<a->B->cmap->n; i++) {
4239       colors[i] = allcolors[a->garray[i]];
4240     }
4241     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4242     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4243     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4244     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4245   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4246     ISColoringValue *colors;
4247     PetscInt        *larray;
4248     ISColoring      ocoloring;
4249 
4250     /* set coloring for diagonal portion */
4251     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4252     for (i=0; i<a->A->cmap->n; i++) {
4253       larray[i] = i + A->cmap->rstart;
4254     }
4255     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4256     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4257     for (i=0; i<a->A->cmap->n; i++) {
4258       colors[i] = coloring->colors[larray[i]];
4259     }
4260     ierr = PetscFree(larray);CHKERRQ(ierr);
4261     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4262     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4263     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4264 
4265     /* set coloring for off-diagonal portion */
4266     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4267     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4268     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4269     for (i=0; i<a->B->cmap->n; i++) {
4270       colors[i] = coloring->colors[larray[i]];
4271     }
4272     ierr = PetscFree(larray);CHKERRQ(ierr);
4273     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4274     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4275     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4276   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4277 
4278   PetscFunctionReturn(0);
4279 }
4280 
4281 #if defined(PETSC_HAVE_ADIC)
4282 #undef __FUNCT__
4283 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4284 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4285 {
4286   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4287   PetscErrorCode ierr;
4288 
4289   PetscFunctionBegin;
4290   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4291   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4292   PetscFunctionReturn(0);
4293 }
4294 #endif
4295 
4296 #undef __FUNCT__
4297 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4298 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4299 {
4300   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4301   PetscErrorCode ierr;
4302 
4303   PetscFunctionBegin;
4304   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4305   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4306   PetscFunctionReturn(0);
4307 }
4308 
4309 #undef __FUNCT__
4310 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4311 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4312 {
4313   PetscErrorCode ierr;
4314   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4315   PetscInt       *indx;
4316 
4317   PetscFunctionBegin;
4318   /* This routine will ONLY return MPIAIJ type matrix */
4319   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4320   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4321   if (n == PETSC_DECIDE){
4322     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4323   }
4324   /* Check sum(n) = N */
4325   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4326   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4327 
4328   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4329   rstart -= m;
4330 
4331   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4332   for (i=0;i<m;i++) {
4333     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4334     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4335     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4336   }
4337 
4338   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4339   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4340   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4341   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4342   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4343   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4344   PetscFunctionReturn(0);
4345 }
4346 
4347 #undef __FUNCT__
4348 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4349 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4350 {
4351   PetscErrorCode ierr;
4352   PetscInt       m,N,i,rstart,nnz,Ii;
4353   PetscInt       *indx;
4354   PetscScalar    *values;
4355 
4356   PetscFunctionBegin;
4357   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4358   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4359   for (i=0;i<m;i++) {
4360     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4361     Ii    = i + rstart;
4362     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4363     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4364   }
4365   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4366   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4367   PetscFunctionReturn(0);
4368 }
4369 
4370 #undef __FUNCT__
4371 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4372 /*@
4373       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4374                  matrices from each processor
4375 
4376     Collective on MPI_Comm
4377 
4378    Input Parameters:
4379 +    comm - the communicators the parallel matrix will live on
4380 .    inmat - the input sequential matrices
4381 .    n - number of local columns (or PETSC_DECIDE)
4382 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4383 
4384    Output Parameter:
4385 .    outmat - the parallel matrix generated
4386 
4387     Level: advanced
4388 
4389    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4390 
4391 @*/
4392 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4393 {
4394   PetscErrorCode ierr;
4395 
4396   PetscFunctionBegin;
4397   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4398   if (scall == MAT_INITIAL_MATRIX){
4399     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4400   }
4401   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4402   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #undef __FUNCT__
4407 #define __FUNCT__ "MatFileSplit"
4408 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4409 {
4410   PetscErrorCode    ierr;
4411   PetscMPIInt       rank;
4412   PetscInt          m,N,i,rstart,nnz;
4413   size_t            len;
4414   const PetscInt    *indx;
4415   PetscViewer       out;
4416   char              *name;
4417   Mat               B;
4418   const PetscScalar *values;
4419 
4420   PetscFunctionBegin;
4421   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4422   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4423   /* Should this be the type of the diagonal block of A? */
4424   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4425   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4426   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4427   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4428   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4429   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4430   for (i=0;i<m;i++) {
4431     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4432     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4433     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4434   }
4435   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4436   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4437 
4438   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4439   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4440   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4441   sprintf(name,"%s.%d",outfile,rank);
4442   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4443   ierr = PetscFree(name);
4444   ierr = MatView(B,out);CHKERRQ(ierr);
4445   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4446   ierr = MatDestroy(&B);CHKERRQ(ierr);
4447   PetscFunctionReturn(0);
4448 }
4449 
4450 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4451 #undef __FUNCT__
4452 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4453 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4454 {
4455   PetscErrorCode       ierr;
4456   Mat_Merge_SeqsToMPI  *merge;
4457   PetscContainer       container;
4458 
4459   PetscFunctionBegin;
4460   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4461   if (container) {
4462     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4463     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4464     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4466     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4468     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4469     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4470     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4475     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4476     ierr = PetscFree(merge);CHKERRQ(ierr);
4477     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4478   }
4479   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4480   PetscFunctionReturn(0);
4481 }
4482 
4483 #include <../src/mat/utils/freespace.h>
4484 #include <petscbt.h>
4485 
4486 #undef __FUNCT__
4487 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4488 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4489 {
4490   PetscErrorCode       ierr;
4491   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4492   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4493   PetscMPIInt          size,rank,taga,*len_s;
4494   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4495   PetscInt             proc,m;
4496   PetscInt             **buf_ri,**buf_rj;
4497   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4498   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4499   MPI_Request          *s_waits,*r_waits;
4500   MPI_Status           *status;
4501   MatScalar            *aa=a->a;
4502   MatScalar            **abuf_r,*ba_i;
4503   Mat_Merge_SeqsToMPI  *merge;
4504   PetscContainer       container;
4505 
4506   PetscFunctionBegin;
4507   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4508 
4509   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4510   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4511 
4512   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4513   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4514 
4515   bi     = merge->bi;
4516   bj     = merge->bj;
4517   buf_ri = merge->buf_ri;
4518   buf_rj = merge->buf_rj;
4519 
4520   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4521   owners = merge->rowmap->range;
4522   len_s  = merge->len_s;
4523 
4524   /* send and recv matrix values */
4525   /*-----------------------------*/
4526   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4527   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4528 
4529   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4530   for (proc=0,k=0; proc<size; proc++){
4531     if (!len_s[proc]) continue;
4532     i = owners[proc];
4533     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4534     k++;
4535   }
4536 
4537   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4538   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4539   ierr = PetscFree(status);CHKERRQ(ierr);
4540 
4541   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4542   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4543 
4544   /* insert mat values of mpimat */
4545   /*----------------------------*/
4546   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4547   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4548 
4549   for (k=0; k<merge->nrecv; k++){
4550     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4551     nrows = *(buf_ri_k[k]);
4552     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4553     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4554   }
4555 
4556   /* set values of ba */
4557   m = merge->rowmap->n;
4558   for (i=0; i<m; i++) {
4559     arow = owners[rank] + i;
4560     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4561     bnzi = bi[i+1] - bi[i];
4562     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4563 
4564     /* add local non-zero vals of this proc's seqmat into ba */
4565     anzi = ai[arow+1] - ai[arow];
4566     aj   = a->j + ai[arow];
4567     aa   = a->a + ai[arow];
4568     nextaj = 0;
4569     for (j=0; nextaj<anzi; j++){
4570       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4571         ba_i[j] += aa[nextaj++];
4572       }
4573     }
4574 
4575     /* add received vals into ba */
4576     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4577       /* i-th row */
4578       if (i == *nextrow[k]) {
4579         anzi = *(nextai[k]+1) - *nextai[k];
4580         aj   = buf_rj[k] + *(nextai[k]);
4581         aa   = abuf_r[k] + *(nextai[k]);
4582         nextaj = 0;
4583         for (j=0; nextaj<anzi; j++){
4584           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4585             ba_i[j] += aa[nextaj++];
4586           }
4587         }
4588         nextrow[k]++; nextai[k]++;
4589       }
4590     }
4591     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4592   }
4593   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4594   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4595 
4596   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4597   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4598   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4599   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4600   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4601   PetscFunctionReturn(0);
4602 }
4603 
4604 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4605 
4606 #undef __FUNCT__
4607 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4608 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4609 {
4610   PetscErrorCode       ierr;
4611   Mat                  B_mpi;
4612   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4613   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4614   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4615   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4616   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4617   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4618   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4619   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4620   MPI_Status           *status;
4621   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4622   PetscBT              lnkbt;
4623   Mat_Merge_SeqsToMPI  *merge;
4624   PetscContainer       container;
4625 
4626   PetscFunctionBegin;
4627   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4628 
4629   /* make sure it is a PETSc comm */
4630   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4631   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4632   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4633 
4634   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4635   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4636 
4637   /* determine row ownership */
4638   /*---------------------------------------------------------*/
4639   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4640   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4641   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4642   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4643   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4644   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4645   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4646 
4647   m      = merge->rowmap->n;
4648   M      = merge->rowmap->N;
4649   owners = merge->rowmap->range;
4650 
4651   /* determine the number of messages to send, their lengths */
4652   /*---------------------------------------------------------*/
4653   len_s  = merge->len_s;
4654 
4655   len = 0;  /* length of buf_si[] */
4656   merge->nsend = 0;
4657   for (proc=0; proc<size; proc++){
4658     len_si[proc] = 0;
4659     if (proc == rank){
4660       len_s[proc] = 0;
4661     } else {
4662       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4663       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4664     }
4665     if (len_s[proc]) {
4666       merge->nsend++;
4667       nrows = 0;
4668       for (i=owners[proc]; i<owners[proc+1]; i++){
4669         if (ai[i+1] > ai[i]) nrows++;
4670       }
4671       len_si[proc] = 2*(nrows+1);
4672       len += len_si[proc];
4673     }
4674   }
4675 
4676   /* determine the number and length of messages to receive for ij-structure */
4677   /*-------------------------------------------------------------------------*/
4678   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4679   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4680 
4681   /* post the Irecv of j-structure */
4682   /*-------------------------------*/
4683   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4684   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4685 
4686   /* post the Isend of j-structure */
4687   /*--------------------------------*/
4688   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4689 
4690   for (proc=0, k=0; proc<size; proc++){
4691     if (!len_s[proc]) continue;
4692     i = owners[proc];
4693     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4694     k++;
4695   }
4696 
4697   /* receives and sends of j-structure are complete */
4698   /*------------------------------------------------*/
4699   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4700   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4701 
4702   /* send and recv i-structure */
4703   /*---------------------------*/
4704   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4705   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4706 
4707   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4708   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4709   for (proc=0,k=0; proc<size; proc++){
4710     if (!len_s[proc]) continue;
4711     /* form outgoing message for i-structure:
4712          buf_si[0]:                 nrows to be sent
4713                [1:nrows]:           row index (global)
4714                [nrows+1:2*nrows+1]: i-structure index
4715     */
4716     /*-------------------------------------------*/
4717     nrows = len_si[proc]/2 - 1;
4718     buf_si_i    = buf_si + nrows+1;
4719     buf_si[0]   = nrows;
4720     buf_si_i[0] = 0;
4721     nrows = 0;
4722     for (i=owners[proc]; i<owners[proc+1]; i++){
4723       anzi = ai[i+1] - ai[i];
4724       if (anzi) {
4725         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4726         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4727         nrows++;
4728       }
4729     }
4730     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4731     k++;
4732     buf_si += len_si[proc];
4733   }
4734 
4735   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4736   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4737 
4738   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4739   for (i=0; i<merge->nrecv; i++){
4740     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4741   }
4742 
4743   ierr = PetscFree(len_si);CHKERRQ(ierr);
4744   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4745   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4746   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4747   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4748   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4749   ierr = PetscFree(status);CHKERRQ(ierr);
4750 
4751   /* compute a local seq matrix in each processor */
4752   /*----------------------------------------------*/
4753   /* allocate bi array and free space for accumulating nonzero column info */
4754   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4755   bi[0] = 0;
4756 
4757   /* create and initialize a linked list */
4758   nlnk = N+1;
4759   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4760 
4761   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4762   len = 0;
4763   len  = ai[owners[rank+1]] - ai[owners[rank]];
4764   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4765   current_space = free_space;
4766 
4767   /* determine symbolic info for each local row */
4768   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4769 
4770   for (k=0; k<merge->nrecv; k++){
4771     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4772     nrows = *buf_ri_k[k];
4773     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4774     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4775   }
4776 
4777   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4778   len = 0;
4779   for (i=0;i<m;i++) {
4780     bnzi   = 0;
4781     /* add local non-zero cols of this proc's seqmat into lnk */
4782     arow   = owners[rank] + i;
4783     anzi   = ai[arow+1] - ai[arow];
4784     aj     = a->j + ai[arow];
4785     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4786     bnzi += nlnk;
4787     /* add received col data into lnk */
4788     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4789       if (i == *nextrow[k]) { /* i-th row */
4790         anzi = *(nextai[k]+1) - *nextai[k];
4791         aj   = buf_rj[k] + *nextai[k];
4792         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4793         bnzi += nlnk;
4794         nextrow[k]++; nextai[k]++;
4795       }
4796     }
4797     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4798 
4799     /* if free space is not available, make more free space */
4800     if (current_space->local_remaining<bnzi) {
4801       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4802       nspacedouble++;
4803     }
4804     /* copy data into free space, then initialize lnk */
4805     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4806     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4807 
4808     current_space->array           += bnzi;
4809     current_space->local_used      += bnzi;
4810     current_space->local_remaining -= bnzi;
4811 
4812     bi[i+1] = bi[i] + bnzi;
4813   }
4814 
4815   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4816 
4817   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4818   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4819   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4820 
4821   /* create symbolic parallel matrix B_mpi */
4822   /*---------------------------------------*/
4823     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4824   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4825   if (n==PETSC_DECIDE) {
4826     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4827   } else {
4828     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4829   }
4830   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4831   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4832   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4833   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4834   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4835 
4836   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4837   B_mpi->assembled     = PETSC_FALSE;
4838   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4839   merge->bi            = bi;
4840   merge->bj            = bj;
4841   merge->buf_ri        = buf_ri;
4842   merge->buf_rj        = buf_rj;
4843   merge->coi           = PETSC_NULL;
4844   merge->coj           = PETSC_NULL;
4845   merge->owners_co     = PETSC_NULL;
4846 
4847   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4848 
4849   /* attach the supporting struct to B_mpi for reuse */
4850   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4851   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4852   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4853   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4854   *mpimat = B_mpi;
4855 
4856   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4857   PetscFunctionReturn(0);
4858 }
4859 
4860 #undef __FUNCT__
4861 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4862 /*@C
4863       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4864                  matrices from each processor
4865 
4866     Collective on MPI_Comm
4867 
4868    Input Parameters:
4869 +    comm - the communicators the parallel matrix will live on
4870 .    seqmat - the input sequential matrices
4871 .    m - number of local rows (or PETSC_DECIDE)
4872 .    n - number of local columns (or PETSC_DECIDE)
4873 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4874 
4875    Output Parameter:
4876 .    mpimat - the parallel matrix generated
4877 
4878     Level: advanced
4879 
4880    Notes:
4881      The dimensions of the sequential matrix in each processor MUST be the same.
4882      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4883      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4884 @*/
4885 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4886 {
4887   PetscErrorCode   ierr;
4888   PetscMPIInt     size;
4889 
4890   PetscFunctionBegin;
4891   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4892   if (size == 1){
4893      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4894      if (scall == MAT_INITIAL_MATRIX){
4895        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4896      } else {
4897        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4898      }
4899      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4900      PetscFunctionReturn(0);
4901   }
4902   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4903   if (scall == MAT_INITIAL_MATRIX){
4904     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4905   }
4906   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4907   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4908   PetscFunctionReturn(0);
4909 }
4910 
4911 #undef __FUNCT__
4912 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4913 /*@
4914      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4915           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4916           with MatGetSize()
4917 
4918     Not Collective
4919 
4920    Input Parameters:
4921 +    A - the matrix
4922 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4923 
4924    Output Parameter:
4925 .    A_loc - the local sequential matrix generated
4926 
4927     Level: developer
4928 
4929 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4930 
4931 @*/
4932 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4933 {
4934   PetscErrorCode  ierr;
4935   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4936   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4937   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4938   MatScalar       *aa=a->a,*ba=b->a,*cam;
4939   PetscScalar     *ca;
4940   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4941   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4942   PetscBool       match;
4943 
4944   PetscFunctionBegin;
4945   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4946   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4947   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4948   if (scall == MAT_INITIAL_MATRIX){
4949     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4950     ci[0] = 0;
4951     for (i=0; i<am; i++){
4952       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4953     }
4954     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4955     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4956     k = 0;
4957     for (i=0; i<am; i++) {
4958       ncols_o = bi[i+1] - bi[i];
4959       ncols_d = ai[i+1] - ai[i];
4960       /* off-diagonal portion of A */
4961       for (jo=0; jo<ncols_o; jo++) {
4962         col = cmap[*bj];
4963         if (col >= cstart) break;
4964         cj[k]   = col; bj++;
4965         ca[k++] = *ba++;
4966       }
4967       /* diagonal portion of A */
4968       for (j=0; j<ncols_d; j++) {
4969         cj[k]   = cstart + *aj++;
4970         ca[k++] = *aa++;
4971       }
4972       /* off-diagonal portion of A */
4973       for (j=jo; j<ncols_o; j++) {
4974         cj[k]   = cmap[*bj++];
4975         ca[k++] = *ba++;
4976       }
4977     }
4978     /* put together the new matrix */
4979     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4980     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4981     /* Since these are PETSc arrays, change flags to free them as necessary. */
4982     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4983     mat->free_a  = PETSC_TRUE;
4984     mat->free_ij = PETSC_TRUE;
4985     mat->nonew   = 0;
4986   } else if (scall == MAT_REUSE_MATRIX){
4987     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4988     ci = mat->i; cj = mat->j; cam = mat->a;
4989     for (i=0; i<am; i++) {
4990       /* off-diagonal portion of A */
4991       ncols_o = bi[i+1] - bi[i];
4992       for (jo=0; jo<ncols_o; jo++) {
4993         col = cmap[*bj];
4994         if (col >= cstart) break;
4995         *cam++ = *ba++; bj++;
4996       }
4997       /* diagonal portion of A */
4998       ncols_d = ai[i+1] - ai[i];
4999       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5000       /* off-diagonal portion of A */
5001       for (j=jo; j<ncols_o; j++) {
5002         *cam++ = *ba++; bj++;
5003       }
5004     }
5005   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5006   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5007   PetscFunctionReturn(0);
5008 }
5009 
5010 #undef __FUNCT__
5011 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5012 /*@C
5013      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5014 
5015     Not Collective
5016 
5017    Input Parameters:
5018 +    A - the matrix
5019 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5020 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5021 
5022    Output Parameter:
5023 .    A_loc - the local sequential matrix generated
5024 
5025     Level: developer
5026 
5027 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5028 
5029 @*/
5030 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5031 {
5032   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5033   PetscErrorCode    ierr;
5034   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5035   IS                isrowa,iscola;
5036   Mat               *aloc;
5037   PetscBool       match;
5038 
5039   PetscFunctionBegin;
5040   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5041   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5042   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5043   if (!row){
5044     start = A->rmap->rstart; end = A->rmap->rend;
5045     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5046   } else {
5047     isrowa = *row;
5048   }
5049   if (!col){
5050     start = A->cmap->rstart;
5051     cmap  = a->garray;
5052     nzA   = a->A->cmap->n;
5053     nzB   = a->B->cmap->n;
5054     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5055     ncols = 0;
5056     for (i=0; i<nzB; i++) {
5057       if (cmap[i] < start) idx[ncols++] = cmap[i];
5058       else break;
5059     }
5060     imark = i;
5061     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5062     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5063     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5064   } else {
5065     iscola = *col;
5066   }
5067   if (scall != MAT_INITIAL_MATRIX){
5068     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5069     aloc[0] = *A_loc;
5070   }
5071   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5072   *A_loc = aloc[0];
5073   ierr = PetscFree(aloc);CHKERRQ(ierr);
5074   if (!row){
5075     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5076   }
5077   if (!col){
5078     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5079   }
5080   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5081   PetscFunctionReturn(0);
5082 }
5083 
5084 #undef __FUNCT__
5085 #define __FUNCT__ "MatGetBrowsOfAcols"
5086 /*@C
5087     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5088 
5089     Collective on Mat
5090 
5091    Input Parameters:
5092 +    A,B - the matrices in mpiaij format
5093 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5094 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5095 
5096    Output Parameter:
5097 +    rowb, colb - index sets of rows and columns of B to extract
5098 -    B_seq - the sequential matrix generated
5099 
5100     Level: developer
5101 
5102 @*/
5103 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5104 {
5105   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5106   PetscErrorCode    ierr;
5107   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5108   IS                isrowb,iscolb;
5109   Mat               *bseq=PETSC_NULL;
5110 
5111   PetscFunctionBegin;
5112   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5113     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5114   }
5115   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5116 
5117   if (scall == MAT_INITIAL_MATRIX){
5118     start = A->cmap->rstart;
5119     cmap  = a->garray;
5120     nzA   = a->A->cmap->n;
5121     nzB   = a->B->cmap->n;
5122     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5123     ncols = 0;
5124     for (i=0; i<nzB; i++) {  /* row < local row index */
5125       if (cmap[i] < start) idx[ncols++] = cmap[i];
5126       else break;
5127     }
5128     imark = i;
5129     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5130     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5131     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5132     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5133   } else {
5134     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5135     isrowb = *rowb; iscolb = *colb;
5136     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5137     bseq[0] = *B_seq;
5138   }
5139   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5140   *B_seq = bseq[0];
5141   ierr = PetscFree(bseq);CHKERRQ(ierr);
5142   if (!rowb){
5143     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5144   } else {
5145     *rowb = isrowb;
5146   }
5147   if (!colb){
5148     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5149   } else {
5150     *colb = iscolb;
5151   }
5152   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5153   PetscFunctionReturn(0);
5154 }
5155 
5156 #undef __FUNCT__
5157 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5158 /*
5159     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5160     of the OFF-DIAGONAL portion of local A
5161 
5162     Collective on Mat
5163 
5164    Input Parameters:
5165 +    A,B - the matrices in mpiaij format
5166 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5167 
5168    Output Parameter:
5169 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5170 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5171 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5172 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5173 
5174     Level: developer
5175 
5176 */
5177 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5178 {
5179   VecScatter_MPI_General *gen_to,*gen_from;
5180   PetscErrorCode         ierr;
5181   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5182   Mat_SeqAIJ             *b_oth;
5183   VecScatter             ctx=a->Mvctx;
5184   MPI_Comm               comm=((PetscObject)ctx)->comm;
5185   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5186   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5187   PetscScalar            *rvalues,*svalues;
5188   MatScalar              *b_otha,*bufa,*bufA;
5189   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5190   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5191   MPI_Status             *sstatus,rstatus;
5192   PetscMPIInt            jj;
5193   PetscInt               *cols,sbs,rbs;
5194   PetscScalar            *vals;
5195 
5196   PetscFunctionBegin;
5197   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5198     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5199   }
5200   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5201   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5202 
5203   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5204   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5205   rvalues  = gen_from->values; /* holds the length of receiving row */
5206   svalues  = gen_to->values;   /* holds the length of sending row */
5207   nrecvs   = gen_from->n;
5208   nsends   = gen_to->n;
5209 
5210   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5211   srow     = gen_to->indices;   /* local row index to be sent */
5212   sstarts  = gen_to->starts;
5213   sprocs   = gen_to->procs;
5214   sstatus  = gen_to->sstatus;
5215   sbs      = gen_to->bs;
5216   rstarts  = gen_from->starts;
5217   rprocs   = gen_from->procs;
5218   rbs      = gen_from->bs;
5219 
5220   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5221   if (scall == MAT_INITIAL_MATRIX){
5222     /* i-array */
5223     /*---------*/
5224     /*  post receives */
5225     for (i=0; i<nrecvs; i++){
5226       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5227       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5228       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5229     }
5230 
5231     /* pack the outgoing message */
5232     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5233     sstartsj[0] = 0;  rstartsj[0] = 0;
5234     len = 0; /* total length of j or a array to be sent */
5235     k = 0;
5236     for (i=0; i<nsends; i++){
5237       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5238       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5239       for (j=0; j<nrows; j++) {
5240         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5241         for (l=0; l<sbs; l++){
5242           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5243           rowlen[j*sbs+l] = ncols;
5244           len += ncols;
5245           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5246         }
5247         k++;
5248       }
5249       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5250       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5251     }
5252     /* recvs and sends of i-array are completed */
5253     i = nrecvs;
5254     while (i--) {
5255       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5256     }
5257     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5258 
5259     /* allocate buffers for sending j and a arrays */
5260     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5261     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5262 
5263     /* create i-array of B_oth */
5264     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5265     b_othi[0] = 0;
5266     len = 0; /* total length of j or a array to be received */
5267     k = 0;
5268     for (i=0; i<nrecvs; i++){
5269       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5270       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5271       for (j=0; j<nrows; j++) {
5272         b_othi[k+1] = b_othi[k] + rowlen[j];
5273         len += rowlen[j]; k++;
5274       }
5275       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5276     }
5277 
5278     /* allocate space for j and a arrrays of B_oth */
5279     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5280     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5281 
5282     /* j-array */
5283     /*---------*/
5284     /*  post receives of j-array */
5285     for (i=0; i<nrecvs; i++){
5286       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5287       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5288     }
5289 
5290     /* pack the outgoing message j-array */
5291     k = 0;
5292     for (i=0; i<nsends; i++){
5293       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5294       bufJ = bufj+sstartsj[i];
5295       for (j=0; j<nrows; j++) {
5296         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5297         for (ll=0; ll<sbs; ll++){
5298           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5299           for (l=0; l<ncols; l++){
5300             *bufJ++ = cols[l];
5301           }
5302           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5303         }
5304       }
5305       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5306     }
5307 
5308     /* recvs and sends of j-array are completed */
5309     i = nrecvs;
5310     while (i--) {
5311       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5312     }
5313     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5314   } else if (scall == MAT_REUSE_MATRIX){
5315     sstartsj = *startsj_s;
5316     rstartsj = *startsj_r;
5317     bufa     = *bufa_ptr;
5318     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5319     b_otha   = b_oth->a;
5320   } else {
5321     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5322   }
5323 
5324   /* a-array */
5325   /*---------*/
5326   /*  post receives of a-array */
5327   for (i=0; i<nrecvs; i++){
5328     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5329     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5330   }
5331 
5332   /* pack the outgoing message a-array */
5333   k = 0;
5334   for (i=0; i<nsends; i++){
5335     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5336     bufA = bufa+sstartsj[i];
5337     for (j=0; j<nrows; j++) {
5338       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5339       for (ll=0; ll<sbs; ll++){
5340         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5341         for (l=0; l<ncols; l++){
5342           *bufA++ = vals[l];
5343         }
5344         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5345       }
5346     }
5347     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5348   }
5349   /* recvs and sends of a-array are completed */
5350   i = nrecvs;
5351   while (i--) {
5352     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5353   }
5354   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5355   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5356 
5357   if (scall == MAT_INITIAL_MATRIX){
5358     /* put together the new matrix */
5359     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5360 
5361     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5362     /* Since these are PETSc arrays, change flags to free them as necessary. */
5363     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5364     b_oth->free_a  = PETSC_TRUE;
5365     b_oth->free_ij = PETSC_TRUE;
5366     b_oth->nonew   = 0;
5367 
5368     ierr = PetscFree(bufj);CHKERRQ(ierr);
5369     if (!startsj_s || !bufa_ptr){
5370       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5371       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5372     } else {
5373       *startsj_s = sstartsj;
5374       *startsj_r = rstartsj;
5375       *bufa_ptr  = bufa;
5376     }
5377   }
5378   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5379   PetscFunctionReturn(0);
5380 }
5381 
5382 #undef __FUNCT__
5383 #define __FUNCT__ "MatGetCommunicationStructs"
5384 /*@C
5385   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5386 
5387   Not Collective
5388 
5389   Input Parameters:
5390 . A - The matrix in mpiaij format
5391 
5392   Output Parameter:
5393 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5394 . colmap - A map from global column index to local index into lvec
5395 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5396 
5397   Level: developer
5398 
5399 @*/
5400 #if defined (PETSC_USE_CTABLE)
5401 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5402 #else
5403 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5404 #endif
5405 {
5406   Mat_MPIAIJ *a;
5407 
5408   PetscFunctionBegin;
5409   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5410   PetscValidPointer(lvec, 2);
5411   PetscValidPointer(colmap, 3);
5412   PetscValidPointer(multScatter, 4);
5413   a = (Mat_MPIAIJ *) A->data;
5414   if (lvec) *lvec = a->lvec;
5415   if (colmap) *colmap = a->colmap;
5416   if (multScatter) *multScatter = a->Mvctx;
5417   PetscFunctionReturn(0);
5418 }
5419 
5420 EXTERN_C_BEGIN
5421 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5422 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5423 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5424 EXTERN_C_END
5425 
5426 #undef __FUNCT__
5427 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5428 /*
5429     Computes (B'*A')' since computing B*A directly is untenable
5430 
5431                n                       p                          p
5432         (              )       (              )         (                  )
5433       m (      A       )  *  n (       B      )   =   m (         C        )
5434         (              )       (              )         (                  )
5435 
5436 */
5437 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5438 {
5439   PetscErrorCode     ierr;
5440   Mat                At,Bt,Ct;
5441 
5442   PetscFunctionBegin;
5443   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5444   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5445   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5446   ierr = MatDestroy(&At);CHKERRQ(ierr);
5447   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5448   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5449   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5450   PetscFunctionReturn(0);
5451 }
5452 
5453 #undef __FUNCT__
5454 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5455 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5456 {
5457   PetscErrorCode ierr;
5458   PetscInt       m=A->rmap->n,n=B->cmap->n;
5459   Mat            Cmat;
5460 
5461   PetscFunctionBegin;
5462   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5463   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5464   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5465   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5466   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5467   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5468   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5469   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5470   *C   = Cmat;
5471   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5472   PetscFunctionReturn(0);
5473 }
5474 
5475 /* ----------------------------------------------------------------*/
5476 #undef __FUNCT__
5477 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5478 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5479 {
5480   PetscErrorCode ierr;
5481 
5482   PetscFunctionBegin;
5483   if (scall == MAT_INITIAL_MATRIX){
5484     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5485   }
5486   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5487   PetscFunctionReturn(0);
5488 }
5489 
5490 EXTERN_C_BEGIN
5491 #if defined(PETSC_HAVE_MUMPS)
5492 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5493 #endif
5494 #if defined(PETSC_HAVE_PASTIX)
5495 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5496 #endif
5497 #if defined(PETSC_HAVE_SUPERLU_DIST)
5498 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5499 #endif
5500 #if defined(PETSC_HAVE_SPOOLES)
5501 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5502 #endif
5503 EXTERN_C_END
5504 
5505 /*MC
5506    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5507 
5508    Options Database Keys:
5509 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5510 
5511   Level: beginner
5512 
5513 .seealso: MatCreateAIJ()
5514 M*/
5515 
5516 EXTERN_C_BEGIN
5517 #undef __FUNCT__
5518 #define __FUNCT__ "MatCreate_MPIAIJ"
5519 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5520 {
5521   Mat_MPIAIJ     *b;
5522   PetscErrorCode ierr;
5523   PetscMPIInt    size;
5524 
5525   PetscFunctionBegin;
5526   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5527 
5528   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5529   B->data         = (void*)b;
5530   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5531   B->assembled    = PETSC_FALSE;
5532 
5533   B->insertmode   = NOT_SET_VALUES;
5534   b->size         = size;
5535   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5536 
5537   /* build cache for off array entries formed */
5538   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5539   b->donotstash  = PETSC_FALSE;
5540   b->colmap      = 0;
5541   b->garray      = 0;
5542   b->roworiented = PETSC_TRUE;
5543 
5544   /* stuff used for matrix vector multiply */
5545   b->lvec      = PETSC_NULL;
5546   b->Mvctx     = PETSC_NULL;
5547 
5548   /* stuff for MatGetRow() */
5549   b->rowindices   = 0;
5550   b->rowvalues    = 0;
5551   b->getrowactive = PETSC_FALSE;
5552 
5553 #if defined(PETSC_HAVE_SPOOLES)
5554   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5555                                      "MatGetFactor_mpiaij_spooles",
5556                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5557 #endif
5558 #if defined(PETSC_HAVE_MUMPS)
5559   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5560                                      "MatGetFactor_aij_mumps",
5561                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5562 #endif
5563 #if defined(PETSC_HAVE_PASTIX)
5564   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5565 					   "MatGetFactor_mpiaij_pastix",
5566 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5567 #endif
5568 #if defined(PETSC_HAVE_SUPERLU_DIST)
5569   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5570                                      "MatGetFactor_mpiaij_superlu_dist",
5571                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5572 #endif
5573   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5574                                      "MatStoreValues_MPIAIJ",
5575                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5577                                      "MatRetrieveValues_MPIAIJ",
5578                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5579   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5580 				     "MatGetDiagonalBlock_MPIAIJ",
5581                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5582   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5583 				     "MatIsTranspose_MPIAIJ",
5584 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5585   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5586 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5587 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5589 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5590 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5592 				     "MatDiagonalScaleLocal_MPIAIJ",
5593 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5595                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5596                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5598                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5599                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5601                                      "MatConvert_MPIAIJ_MPISBAIJ",
5602                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5604                                      "MatMatMult_MPIDense_MPIAIJ",
5605                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5607                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5608                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5610                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5611                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5613   PetscFunctionReturn(0);
5614 }
5615 EXTERN_C_END
5616 
5617 #undef __FUNCT__
5618 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5619 /*@
5620      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5621          and "off-diagonal" part of the matrix in CSR format.
5622 
5623    Collective on MPI_Comm
5624 
5625    Input Parameters:
5626 +  comm - MPI communicator
5627 .  m - number of local rows (Cannot be PETSC_DECIDE)
5628 .  n - This value should be the same as the local size used in creating the
5629        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5630        calculated if N is given) For square matrices n is almost always m.
5631 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5632 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5633 .   i - row indices for "diagonal" portion of matrix
5634 .   j - column indices
5635 .   a - matrix values
5636 .   oi - row indices for "off-diagonal" portion of matrix
5637 .   oj - column indices
5638 -   oa - matrix values
5639 
5640    Output Parameter:
5641 .   mat - the matrix
5642 
5643    Level: advanced
5644 
5645    Notes:
5646        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5647        must free the arrays once the matrix has been destroyed and not before.
5648 
5649        The i and j indices are 0 based
5650 
5651        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5652 
5653        This sets local rows and cannot be used to set off-processor values.
5654 
5655        You cannot later use MatSetValues() to change values in this matrix.
5656 
5657 .keywords: matrix, aij, compressed row, sparse, parallel
5658 
5659 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5660           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5661 @*/
5662 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5663 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5664 {
5665   PetscErrorCode ierr;
5666   Mat_MPIAIJ     *maij;
5667 
5668  PetscFunctionBegin;
5669   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5670   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5671   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5672   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5673   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5674   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5675   maij = (Mat_MPIAIJ*) (*mat)->data;
5676   maij->donotstash     = PETSC_TRUE;
5677   (*mat)->preallocated = PETSC_TRUE;
5678 
5679   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5680   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5681 
5682   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5683   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5684 
5685   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5686   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5688   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5689 
5690   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692   PetscFunctionReturn(0);
5693 }
5694 
5695 /*
5696     Special version for direct calls from Fortran
5697 */
5698 #include <petsc-private/fortranimpl.h>
5699 
5700 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5701 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5702 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5703 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5704 #endif
5705 
5706 /* Change these macros so can be used in void function */
5707 #undef CHKERRQ
5708 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5709 #undef SETERRQ2
5710 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5711 #undef SETERRQ3
5712 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5713 #undef SETERRQ
5714 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5715 
5716 EXTERN_C_BEGIN
5717 #undef __FUNCT__
5718 #define __FUNCT__ "matsetvaluesmpiaij_"
5719 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5720 {
5721   Mat             mat = *mmat;
5722   PetscInt        m = *mm, n = *mn;
5723   InsertMode      addv = *maddv;
5724   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5725   PetscScalar     value;
5726   PetscErrorCode  ierr;
5727 
5728   MatCheckPreallocated(mat,1);
5729   if (mat->insertmode == NOT_SET_VALUES) {
5730     mat->insertmode = addv;
5731   }
5732 #if defined(PETSC_USE_DEBUG)
5733   else if (mat->insertmode != addv) {
5734     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5735   }
5736 #endif
5737   {
5738   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5739   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5740   PetscBool       roworiented = aij->roworiented;
5741 
5742   /* Some Variables required in the macro */
5743   Mat             A = aij->A;
5744   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5745   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5746   MatScalar       *aa = a->a;
5747   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5748   Mat             B = aij->B;
5749   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5750   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5751   MatScalar       *ba = b->a;
5752 
5753   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5754   PetscInt        nonew = a->nonew;
5755   MatScalar       *ap1,*ap2;
5756 
5757   PetscFunctionBegin;
5758   for (i=0; i<m; i++) {
5759     if (im[i] < 0) continue;
5760 #if defined(PETSC_USE_DEBUG)
5761     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5762 #endif
5763     if (im[i] >= rstart && im[i] < rend) {
5764       row      = im[i] - rstart;
5765       lastcol1 = -1;
5766       rp1      = aj + ai[row];
5767       ap1      = aa + ai[row];
5768       rmax1    = aimax[row];
5769       nrow1    = ailen[row];
5770       low1     = 0;
5771       high1    = nrow1;
5772       lastcol2 = -1;
5773       rp2      = bj + bi[row];
5774       ap2      = ba + bi[row];
5775       rmax2    = bimax[row];
5776       nrow2    = bilen[row];
5777       low2     = 0;
5778       high2    = nrow2;
5779 
5780       for (j=0; j<n; j++) {
5781         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5782         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5783         if (in[j] >= cstart && in[j] < cend){
5784           col = in[j] - cstart;
5785           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5786         } else if (in[j] < 0) continue;
5787 #if defined(PETSC_USE_DEBUG)
5788         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5789 #endif
5790         else {
5791           if (mat->was_assembled) {
5792             if (!aij->colmap) {
5793               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5794             }
5795 #if defined (PETSC_USE_CTABLE)
5796             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5797 	    col--;
5798 #else
5799             col = aij->colmap[in[j]] - 1;
5800 #endif
5801             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5802               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5803               col =  in[j];
5804               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5805               B = aij->B;
5806               b = (Mat_SeqAIJ*)B->data;
5807               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5808               rp2      = bj + bi[row];
5809               ap2      = ba + bi[row];
5810               rmax2    = bimax[row];
5811               nrow2    = bilen[row];
5812               low2     = 0;
5813               high2    = nrow2;
5814               bm       = aij->B->rmap->n;
5815               ba = b->a;
5816             }
5817           } else col = in[j];
5818           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5819         }
5820       }
5821     } else {
5822       if (!aij->donotstash) {
5823         if (roworiented) {
5824           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5825         } else {
5826           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5827         }
5828       }
5829     }
5830   }}
5831   PetscFunctionReturnVoid();
5832 }
5833 EXTERN_C_END
5834 
5835