xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 03a0fabf4bd3dfd189b0d5f539b470107c938ba0)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   Mat            aA,aB,Aperm;
1725   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1726   PetscScalar    *aa,*ba;
1727   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1728   PetscSF        rowsf,sf;
1729   IS             parcolp = PETSC_NULL;
1730   PetscBool      done;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1735   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1736   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1737   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1738 
1739   /* Invert row permutation to find out where my rows should go */
1740   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1741   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1742   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1743   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1744   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1745 
1746   /* Invert column permutation to find out where my columns should go */
1747   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1748   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1749   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1750   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1751   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1752   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1753 
1754   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1755   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1756   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1757 
1758   /* Find out where my gcols should go */
1759   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1760   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1761   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1762   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1763   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1764   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1765   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1766 
1767   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1768   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1769   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1770   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1771   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1772   for (i=0; i<m; i++) {
1773     PetscInt row = rdest[i],rowner;
1774     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1775     for (j=ai[i]; j<ai[i+1]; j++) {
1776       PetscInt cowner,col = cdest[aj[j]];
1777       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1778       if (rowner == cowner) dnnz[i]++;
1779       else onnz[i]++;
1780     }
1781     for (j=bi[i]; j<bi[i+1]; j++) {
1782       PetscInt cowner,col = gcdest[bj[j]];
1783       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1784       if (rowner == cowner) dnnz[i]++;
1785       else onnz[i]++;
1786     }
1787   }
1788   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1789   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1790   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1791   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1792   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1793   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1794   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1795 
1796   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->cmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1797   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1798   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1799   for (i=0; i<m; i++) {
1800     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1801     PetscInt rowlen;
1802     rowlen = ai[i+1] - ai[i];
1803     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1804     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1805     rowlen = bi[i+1] - bi[i];
1806     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1807     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1808   }
1809   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1810   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1811   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1812   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1813   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1814   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1815   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1816   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1817   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1818   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1819   *B = Aperm;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1826 {
1827   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1828   Mat            A = mat->A,B = mat->B;
1829   PetscErrorCode ierr;
1830   PetscReal      isend[5],irecv[5];
1831 
1832   PetscFunctionBegin;
1833   info->block_size     = 1.0;
1834   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1835   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1836   isend[3] = info->memory;  isend[4] = info->mallocs;
1837   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1838   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1839   isend[3] += info->memory;  isend[4] += info->mallocs;
1840   if (flag == MAT_LOCAL) {
1841     info->nz_used      = isend[0];
1842     info->nz_allocated = isend[1];
1843     info->nz_unneeded  = isend[2];
1844     info->memory       = isend[3];
1845     info->mallocs      = isend[4];
1846   } else if (flag == MAT_GLOBAL_MAX) {
1847     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1848     info->nz_used      = irecv[0];
1849     info->nz_allocated = irecv[1];
1850     info->nz_unneeded  = irecv[2];
1851     info->memory       = irecv[3];
1852     info->mallocs      = irecv[4];
1853   } else if (flag == MAT_GLOBAL_SUM) {
1854     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1855     info->nz_used      = irecv[0];
1856     info->nz_allocated = irecv[1];
1857     info->nz_unneeded  = irecv[2];
1858     info->memory       = irecv[3];
1859     info->mallocs      = irecv[4];
1860   }
1861   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1862   info->fill_ratio_needed = 0;
1863   info->factor_mallocs    = 0;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatSetOption_MPIAIJ"
1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1870 {
1871   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1872   PetscErrorCode ierr;
1873 
1874   PetscFunctionBegin;
1875   switch (op) {
1876   case MAT_NEW_NONZERO_LOCATIONS:
1877   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1878   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1879   case MAT_KEEP_NONZERO_PATTERN:
1880   case MAT_NEW_NONZERO_LOCATION_ERR:
1881   case MAT_USE_INODES:
1882   case MAT_IGNORE_ZERO_ENTRIES:
1883     MatCheckPreallocated(A,1);
1884     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1885     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1886     break;
1887   case MAT_ROW_ORIENTED:
1888     a->roworiented = flg;
1889     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1890     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1891     break;
1892   case MAT_NEW_DIAGONALS:
1893     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1894     break;
1895   case MAT_IGNORE_OFF_PROC_ENTRIES:
1896     a->donotstash = flg;
1897     break;
1898   case MAT_SPD:
1899     A->spd_set                         = PETSC_TRUE;
1900     A->spd                             = flg;
1901     if (flg) {
1902       A->symmetric                     = PETSC_TRUE;
1903       A->structurally_symmetric        = PETSC_TRUE;
1904       A->symmetric_set                 = PETSC_TRUE;
1905       A->structurally_symmetric_set    = PETSC_TRUE;
1906     }
1907     break;
1908   case MAT_SYMMETRIC:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   case MAT_STRUCTURALLY_SYMMETRIC:
1912     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1913     break;
1914   case MAT_HERMITIAN:
1915     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1916     break;
1917   case MAT_SYMMETRY_ETERNAL:
1918     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1919     break;
1920   default:
1921     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1922   }
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatGetRow_MPIAIJ"
1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1929 {
1930   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1931   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1932   PetscErrorCode ierr;
1933   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1934   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1935   PetscInt       *cmap,*idx_p;
1936 
1937   PetscFunctionBegin;
1938   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1939   mat->getrowactive = PETSC_TRUE;
1940 
1941   if (!mat->rowvalues && (idx || v)) {
1942     /*
1943         allocate enough space to hold information from the longest row.
1944     */
1945     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1946     PetscInt   max = 1,tmp;
1947     for (i=0; i<matin->rmap->n; i++) {
1948       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1949       if (max < tmp) { max = tmp; }
1950     }
1951     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1952   }
1953 
1954   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1955   lrow = row - rstart;
1956 
1957   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1958   if (!v)   {pvA = 0; pvB = 0;}
1959   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1960   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1961   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1962   nztot = nzA + nzB;
1963 
1964   cmap  = mat->garray;
1965   if (v  || idx) {
1966     if (nztot) {
1967       /* Sort by increasing column numbers, assuming A and B already sorted */
1968       PetscInt imark = -1;
1969       if (v) {
1970         *v = v_p = mat->rowvalues;
1971         for (i=0; i<nzB; i++) {
1972           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1973           else break;
1974         }
1975         imark = i;
1976         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1977         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1978       }
1979       if (idx) {
1980         *idx = idx_p = mat->rowindices;
1981         if (imark > -1) {
1982           for (i=0; i<imark; i++) {
1983             idx_p[i] = cmap[cworkB[i]];
1984           }
1985         } else {
1986           for (i=0; i<nzB; i++) {
1987             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1988             else break;
1989           }
1990           imark = i;
1991         }
1992         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1993         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1994       }
1995     } else {
1996       if (idx) *idx = 0;
1997       if (v)   *v   = 0;
1998     }
1999   }
2000   *nz = nztot;
2001   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2002   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2009 {
2010   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2011 
2012   PetscFunctionBegin;
2013   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2014   aij->getrowactive = PETSC_FALSE;
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatNorm_MPIAIJ"
2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2021 {
2022   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2023   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2024   PetscErrorCode ierr;
2025   PetscInt       i,j,cstart = mat->cmap->rstart;
2026   PetscReal      sum = 0.0;
2027   MatScalar      *v;
2028 
2029   PetscFunctionBegin;
2030   if (aij->size == 1) {
2031     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2032   } else {
2033     if (type == NORM_FROBENIUS) {
2034       v = amat->a;
2035       for (i=0; i<amat->nz; i++) {
2036 #if defined(PETSC_USE_COMPLEX)
2037         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2038 #else
2039         sum += (*v)*(*v); v++;
2040 #endif
2041       }
2042       v = bmat->a;
2043       for (i=0; i<bmat->nz; i++) {
2044 #if defined(PETSC_USE_COMPLEX)
2045         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2046 #else
2047         sum += (*v)*(*v); v++;
2048 #endif
2049       }
2050       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2051       *norm = PetscSqrtReal(*norm);
2052     } else if (type == NORM_1) { /* max column norm */
2053       PetscReal *tmp,*tmp2;
2054       PetscInt  *jj,*garray = aij->garray;
2055       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2056       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2057       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2058       *norm = 0.0;
2059       v = amat->a; jj = amat->j;
2060       for (j=0; j<amat->nz; j++) {
2061         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2062       }
2063       v = bmat->a; jj = bmat->j;
2064       for (j=0; j<bmat->nz; j++) {
2065         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2066       }
2067       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2068       for (j=0; j<mat->cmap->N; j++) {
2069         if (tmp2[j] > *norm) *norm = tmp2[j];
2070       }
2071       ierr = PetscFree(tmp);CHKERRQ(ierr);
2072       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2073     } else if (type == NORM_INFINITY) { /* max row norm */
2074       PetscReal ntemp = 0.0;
2075       for (j=0; j<aij->A->rmap->n; j++) {
2076         v = amat->a + amat->i[j];
2077         sum = 0.0;
2078         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2079           sum += PetscAbsScalar(*v); v++;
2080         }
2081         v = bmat->a + bmat->i[j];
2082         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2083           sum += PetscAbsScalar(*v); v++;
2084         }
2085         if (sum > ntemp) ntemp = sum;
2086       }
2087       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2088     } else {
2089       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2090     }
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatTranspose_MPIAIJ"
2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2098 {
2099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2100   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2101   PetscErrorCode ierr;
2102   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2103   PetscInt       cstart=A->cmap->rstart,ncol;
2104   Mat            B;
2105   MatScalar      *array;
2106 
2107   PetscFunctionBegin;
2108   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2109 
2110   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2111   ai = Aloc->i; aj = Aloc->j;
2112   bi = Bloc->i; bj = Bloc->j;
2113   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2114     PetscInt *d_nnz,*g_nnz,*o_nnz;
2115     PetscSFNode *oloc;
2116     PetscSF sf;
2117 
2118     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2119     /* compute d_nnz for preallocation */
2120     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2121     for (i=0; i<ai[ma]; i++) {
2122       d_nnz[aj[i]] ++;
2123       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2124     }
2125     /* compute local off-diagonal contributions */
2126     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2127     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2128     /* map those to global */
2129     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2130     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2131     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2132     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2133     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2134     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2135 
2136     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2137     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2138     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2139     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2140     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2141     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2142   } else {
2143     B = *matout;
2144     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2145     for (i=0; i<ai[ma]; i++){
2146       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2147     }
2148   }
2149 
2150   /* copy over the A part */
2151   array = Aloc->a;
2152   row = A->rmap->rstart;
2153   for (i=0; i<ma; i++) {
2154     ncol = ai[i+1]-ai[i];
2155     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2156     row++; array += ncol; aj += ncol;
2157   }
2158   aj = Aloc->j;
2159   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2160 
2161   /* copy over the B part */
2162   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2163   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2164   array = Bloc->a;
2165   row = A->rmap->rstart;
2166   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2167   cols_tmp = cols;
2168   for (i=0; i<mb; i++) {
2169     ncol = bi[i+1]-bi[i];
2170     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2171     row++; array += ncol; cols_tmp += ncol;
2172   }
2173   ierr = PetscFree(cols);CHKERRQ(ierr);
2174 
2175   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2176   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2178     *matout = B;
2179   } else {
2180     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2181   }
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2187 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2188 {
2189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2190   Mat            a = aij->A,b = aij->B;
2191   PetscErrorCode ierr;
2192   PetscInt       s1,s2,s3;
2193 
2194   PetscFunctionBegin;
2195   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2196   if (rr) {
2197     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2198     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2199     /* Overlap communication with computation. */
2200     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2201   }
2202   if (ll) {
2203     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2204     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2205     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2206   }
2207   /* scale  the diagonal block */
2208   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2209 
2210   if (rr) {
2211     /* Do a scatter end and then right scale the off-diagonal block */
2212     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2213     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2214   }
2215 
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2221 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2222 {
2223   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2224   PetscErrorCode ierr;
2225 
2226   PetscFunctionBegin;
2227   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatEqual_MPIAIJ"
2233 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2234 {
2235   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2236   Mat            a,b,c,d;
2237   PetscBool      flg;
2238   PetscErrorCode ierr;
2239 
2240   PetscFunctionBegin;
2241   a = matA->A; b = matA->B;
2242   c = matB->A; d = matB->B;
2243 
2244   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2245   if (flg) {
2246     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2247   }
2248   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatCopy_MPIAIJ"
2254 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2255 {
2256   PetscErrorCode ierr;
2257   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2258   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2259 
2260   PetscFunctionBegin;
2261   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2262   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2263     /* because of the column compression in the off-processor part of the matrix a->B,
2264        the number of columns in a->B and b->B may be different, hence we cannot call
2265        the MatCopy() directly on the two parts. If need be, we can provide a more
2266        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2267        then copying the submatrices */
2268     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2269   } else {
2270     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2271     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2272   }
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatSetUp_MPIAIJ"
2278 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2279 {
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #undef __FUNCT__
2288 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2289 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2290 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2291 {
2292   PetscInt          i,m=Y->rmap->N;
2293   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2294   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2295   const PetscInt    *xi = x->i,*yi = y->i;
2296 
2297   PetscFunctionBegin;
2298   /* Set the number of nonzeros in the new matrix */
2299   for (i=0; i<m; i++) {
2300     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2301     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2302     nnz[i] = 0;
2303     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2304       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2305       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2306       nnz[i]++;
2307     }
2308     for (; k<nzy; k++) nnz[i]++;
2309   }
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatAXPY_MPIAIJ"
2315 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2316 {
2317   PetscErrorCode ierr;
2318   PetscInt       i;
2319   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2320   PetscBLASInt   bnz,one=1;
2321   Mat_SeqAIJ     *x,*y;
2322 
2323   PetscFunctionBegin;
2324   if (str == SAME_NONZERO_PATTERN) {
2325     PetscScalar alpha = a;
2326     x = (Mat_SeqAIJ *)xx->A->data;
2327     y = (Mat_SeqAIJ *)yy->A->data;
2328     bnz = PetscBLASIntCast(x->nz);
2329     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2330     x = (Mat_SeqAIJ *)xx->B->data;
2331     y = (Mat_SeqAIJ *)yy->B->data;
2332     bnz = PetscBLASIntCast(x->nz);
2333     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2334   } else if (str == SUBSET_NONZERO_PATTERN) {
2335     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2336 
2337     x = (Mat_SeqAIJ *)xx->B->data;
2338     y = (Mat_SeqAIJ *)yy->B->data;
2339     if (y->xtoy && y->XtoY != xx->B) {
2340       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2341       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2342     }
2343     if (!y->xtoy) { /* get xtoy */
2344       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2345       y->XtoY = xx->B;
2346       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2347     }
2348     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2349   } else {
2350     Mat B;
2351     PetscInt *nnz_d,*nnz_o;
2352     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2353     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2354     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2355     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2356     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2357     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2358     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2359     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2360     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2361     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2362     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2363     ierr = MatHeaderReplace(Y,B);
2364     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2365     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2366   }
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatConjugate_MPIAIJ"
2374 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2375 {
2376 #if defined(PETSC_USE_COMPLEX)
2377   PetscErrorCode ierr;
2378   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2379 
2380   PetscFunctionBegin;
2381   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2382   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2383 #else
2384   PetscFunctionBegin;
2385 #endif
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatRealPart_MPIAIJ"
2391 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2392 {
2393   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2398   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2404 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2405 {
2406   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2407   PetscErrorCode ierr;
2408 
2409   PetscFunctionBegin;
2410   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2411   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 #ifdef PETSC_HAVE_PBGL
2416 
2417 #include <boost/parallel/mpi/bsp_process_group.hpp>
2418 #include <boost/graph/distributed/ilu_default_graph.hpp>
2419 #include <boost/graph/distributed/ilu_0_block.hpp>
2420 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2421 #include <boost/graph/distributed/petsc/interface.hpp>
2422 #include <boost/multi_array.hpp>
2423 #include <boost/parallel/distributed_property_map->hpp>
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2427 /*
2428   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2429 */
2430 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2431 {
2432   namespace petsc = boost::distributed::petsc;
2433 
2434   namespace graph_dist = boost::graph::distributed;
2435   using boost::graph::distributed::ilu_default::process_group_type;
2436   using boost::graph::ilu_permuted;
2437 
2438   PetscBool       row_identity, col_identity;
2439   PetscContainer  c;
2440   PetscInt        m, n, M, N;
2441   PetscErrorCode  ierr;
2442 
2443   PetscFunctionBegin;
2444   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2445   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2446   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2447   if (!row_identity || !col_identity) {
2448     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2449   }
2450 
2451   process_group_type pg;
2452   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2453   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2454   lgraph_type&   level_graph = *lgraph_p;
2455   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2456 
2457   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2458   ilu_permuted(level_graph);
2459 
2460   /* put together the new matrix */
2461   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2462   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2463   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2464   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2465   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2466   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2467   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2468   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2469 
2470   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2471   ierr = PetscContainerSetPointer(c, lgraph_p);
2472   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2473   ierr = PetscContainerDestroy(&c);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2479 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2480 {
2481   PetscFunctionBegin;
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatSolve_MPIAIJ"
2487 /*
2488   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2489 */
2490 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2491 {
2492   namespace graph_dist = boost::graph::distributed;
2493 
2494   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2495   lgraph_type*   lgraph_p;
2496   PetscContainer c;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2501   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2502   ierr = VecCopy(b, x);CHKERRQ(ierr);
2503 
2504   PetscScalar* array_x;
2505   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2506   PetscInt sx;
2507   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2508 
2509   PetscScalar* array_b;
2510   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2511   PetscInt sb;
2512   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2513 
2514   lgraph_type&   level_graph = *lgraph_p;
2515   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2516 
2517   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2518   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2519                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2520 
2521   typedef boost::iterator_property_map<array_ref_type::iterator,
2522                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2523   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2524                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2525 
2526   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2527 
2528   PetscFunctionReturn(0);
2529 }
2530 #endif
2531 
2532 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2533   PetscInt       nzlocal,nsends,nrecvs;
2534   PetscMPIInt    *send_rank,*recv_rank;
2535   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2536   PetscScalar    *sbuf_a,**rbuf_a;
2537   PetscErrorCode (*Destroy)(Mat);
2538 } Mat_Redundant;
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2542 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2543 {
2544   PetscErrorCode       ierr;
2545   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2546   PetscInt             i;
2547 
2548   PetscFunctionBegin;
2549   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2550   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2551   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2552   for (i=0; i<redund->nrecvs; i++){
2553     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2554     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2555   }
2556   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2557   ierr = PetscFree(redund);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatDestroy_MatRedundant"
2563 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2564 {
2565   PetscErrorCode  ierr;
2566   PetscContainer  container;
2567   Mat_Redundant   *redund=PETSC_NULL;
2568 
2569   PetscFunctionBegin;
2570   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2571   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2572   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2573   A->ops->destroy = redund->Destroy;
2574   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2575   if (A->ops->destroy) {
2576     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2577   }
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 #undef __FUNCT__
2582 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2583 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2584 {
2585   PetscMPIInt    rank,size;
2586   MPI_Comm       comm=((PetscObject)mat)->comm;
2587   PetscErrorCode ierr;
2588   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2589   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2590   PetscInt       *rowrange=mat->rmap->range;
2591   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2592   Mat            A=aij->A,B=aij->B,C=*matredundant;
2593   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2594   PetscScalar    *sbuf_a;
2595   PetscInt       nzlocal=a->nz+b->nz;
2596   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2597   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2598   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2599   MatScalar      *aworkA,*aworkB;
2600   PetscScalar    *vals;
2601   PetscMPIInt    tag1,tag2,tag3,imdex;
2602   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2603                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2604   MPI_Status     recv_status,*send_status;
2605   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2606   PetscInt       **rbuf_j=PETSC_NULL;
2607   PetscScalar    **rbuf_a=PETSC_NULL;
2608   Mat_Redundant  *redund=PETSC_NULL;
2609   PetscContainer container;
2610 
2611   PetscFunctionBegin;
2612   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2613   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2614 
2615   if (reuse == MAT_REUSE_MATRIX) {
2616     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2617     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2618     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2619     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2620     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2621     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2622     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2623     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2624 
2625     nsends    = redund->nsends;
2626     nrecvs    = redund->nrecvs;
2627     send_rank = redund->send_rank;
2628     recv_rank = redund->recv_rank;
2629     sbuf_nz   = redund->sbuf_nz;
2630     rbuf_nz   = redund->rbuf_nz;
2631     sbuf_j    = redund->sbuf_j;
2632     sbuf_a    = redund->sbuf_a;
2633     rbuf_j    = redund->rbuf_j;
2634     rbuf_a    = redund->rbuf_a;
2635   }
2636 
2637   if (reuse == MAT_INITIAL_MATRIX){
2638     PetscMPIInt  subrank,subsize;
2639     PetscInt     nleftover,np_subcomm;
2640     /* get the destination processors' id send_rank, nsends and nrecvs */
2641     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2642     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2643     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2644     np_subcomm = size/nsubcomm;
2645     nleftover  = size - nsubcomm*np_subcomm;
2646     nsends = 0; nrecvs = 0;
2647     for (i=0; i<size; i++){ /* i=rank*/
2648       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2649         send_rank[nsends] = i; nsends++;
2650         recv_rank[nrecvs++] = i;
2651       }
2652     }
2653     if (rank >= size - nleftover){/* this proc is a leftover processor */
2654       i = size-nleftover-1;
2655       j = 0;
2656       while (j < nsubcomm - nleftover){
2657         send_rank[nsends++] = i;
2658         i--; j++;
2659       }
2660     }
2661 
2662     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2663       for (i=0; i<nleftover; i++){
2664         recv_rank[nrecvs++] = size-nleftover+i;
2665       }
2666     }
2667 
2668     /* allocate sbuf_j, sbuf_a */
2669     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2670     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2671     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2672   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2673 
2674   /* copy mat's local entries into the buffers */
2675   if (reuse == MAT_INITIAL_MATRIX){
2676     rownz_max = 0;
2677     rptr = sbuf_j;
2678     cols = sbuf_j + rend-rstart + 1;
2679     vals = sbuf_a;
2680     rptr[0] = 0;
2681     for (i=0; i<rend-rstart; i++){
2682       row = i + rstart;
2683       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2684       ncols  = nzA + nzB;
2685       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2686       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2687       /* load the column indices for this row into cols */
2688       lwrite = 0;
2689       for (l=0; l<nzB; l++) {
2690         if ((ctmp = bmap[cworkB[l]]) < cstart){
2691           vals[lwrite]   = aworkB[l];
2692           cols[lwrite++] = ctmp;
2693         }
2694       }
2695       for (l=0; l<nzA; l++){
2696         vals[lwrite]   = aworkA[l];
2697         cols[lwrite++] = cstart + cworkA[l];
2698       }
2699       for (l=0; l<nzB; l++) {
2700         if ((ctmp = bmap[cworkB[l]]) >= cend){
2701           vals[lwrite]   = aworkB[l];
2702           cols[lwrite++] = ctmp;
2703         }
2704       }
2705       vals += ncols;
2706       cols += ncols;
2707       rptr[i+1] = rptr[i] + ncols;
2708       if (rownz_max < ncols) rownz_max = ncols;
2709     }
2710     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2711   } else { /* only copy matrix values into sbuf_a */
2712     rptr = sbuf_j;
2713     vals = sbuf_a;
2714     rptr[0] = 0;
2715     for (i=0; i<rend-rstart; i++){
2716       row = i + rstart;
2717       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2718       ncols  = nzA + nzB;
2719       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2720       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2721       lwrite = 0;
2722       for (l=0; l<nzB; l++) {
2723         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2724       }
2725       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2726       for (l=0; l<nzB; l++) {
2727         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2728       }
2729       vals += ncols;
2730       rptr[i+1] = rptr[i] + ncols;
2731     }
2732   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2733 
2734   /* send nzlocal to others, and recv other's nzlocal */
2735   /*--------------------------------------------------*/
2736   if (reuse == MAT_INITIAL_MATRIX){
2737     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2738     s_waits2 = s_waits3 + nsends;
2739     s_waits1 = s_waits2 + nsends;
2740     r_waits1 = s_waits1 + nsends;
2741     r_waits2 = r_waits1 + nrecvs;
2742     r_waits3 = r_waits2 + nrecvs;
2743   } else {
2744     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2745     r_waits3 = s_waits3 + nsends;
2746   }
2747 
2748   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2749   if (reuse == MAT_INITIAL_MATRIX){
2750     /* get new tags to keep the communication clean */
2751     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2752     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2753     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2754 
2755     /* post receives of other's nzlocal */
2756     for (i=0; i<nrecvs; i++){
2757       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2758     }
2759     /* send nzlocal to others */
2760     for (i=0; i<nsends; i++){
2761       sbuf_nz[i] = nzlocal;
2762       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2763     }
2764     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2765     count = nrecvs;
2766     while (count) {
2767       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2768       recv_rank[imdex] = recv_status.MPI_SOURCE;
2769       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2770       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2771 
2772       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2773       rbuf_nz[imdex] += i + 2;
2774       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2775       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2776       count--;
2777     }
2778     /* wait on sends of nzlocal */
2779     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2780     /* send mat->i,j to others, and recv from other's */
2781     /*------------------------------------------------*/
2782     for (i=0; i<nsends; i++){
2783       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2784       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2785     }
2786     /* wait on receives of mat->i,j */
2787     /*------------------------------*/
2788     count = nrecvs;
2789     while (count) {
2790       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2791       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2792       count--;
2793     }
2794     /* wait on sends of mat->i,j */
2795     /*---------------------------*/
2796     if (nsends) {
2797       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2798     }
2799   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2800 
2801   /* post receives, send and receive mat->a */
2802   /*----------------------------------------*/
2803   for (imdex=0; imdex<nrecvs; imdex++) {
2804     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2805   }
2806   for (i=0; i<nsends; i++){
2807     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2808   }
2809   count = nrecvs;
2810   while (count) {
2811     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2812     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2813     count--;
2814   }
2815   if (nsends) {
2816     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2817   }
2818 
2819   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2820 
2821   /* create redundant matrix */
2822   /*-------------------------*/
2823   if (reuse == MAT_INITIAL_MATRIX){
2824     /* compute rownz_max for preallocation */
2825     for (imdex=0; imdex<nrecvs; imdex++){
2826       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2827       rptr = rbuf_j[imdex];
2828       for (i=0; i<j; i++){
2829         ncols = rptr[i+1] - rptr[i];
2830         if (rownz_max < ncols) rownz_max = ncols;
2831       }
2832     }
2833 
2834     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2835     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2836     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2837     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2838     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2839     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2840   } else {
2841     C = *matredundant;
2842   }
2843 
2844   /* insert local matrix entries */
2845   rptr = sbuf_j;
2846   cols = sbuf_j + rend-rstart + 1;
2847   vals = sbuf_a;
2848   for (i=0; i<rend-rstart; i++){
2849     row   = i + rstart;
2850     ncols = rptr[i+1] - rptr[i];
2851     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2852     vals += ncols;
2853     cols += ncols;
2854   }
2855   /* insert received matrix entries */
2856   for (imdex=0; imdex<nrecvs; imdex++){
2857     rstart = rowrange[recv_rank[imdex]];
2858     rend   = rowrange[recv_rank[imdex]+1];
2859     rptr = rbuf_j[imdex];
2860     cols = rbuf_j[imdex] + rend-rstart + 1;
2861     vals = rbuf_a[imdex];
2862     for (i=0; i<rend-rstart; i++){
2863       row   = i + rstart;
2864       ncols = rptr[i+1] - rptr[i];
2865       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2866       vals += ncols;
2867       cols += ncols;
2868     }
2869   }
2870   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2871   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2872   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2873   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2874   if (reuse == MAT_INITIAL_MATRIX) {
2875     PetscContainer container;
2876     *matredundant = C;
2877     /* create a supporting struct and attach it to C for reuse */
2878     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2879     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2880     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2881     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2882     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2883     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2884 
2885     redund->nzlocal = nzlocal;
2886     redund->nsends  = nsends;
2887     redund->nrecvs  = nrecvs;
2888     redund->send_rank = send_rank;
2889     redund->recv_rank = recv_rank;
2890     redund->sbuf_nz = sbuf_nz;
2891     redund->rbuf_nz = rbuf_nz;
2892     redund->sbuf_j  = sbuf_j;
2893     redund->sbuf_a  = sbuf_a;
2894     redund->rbuf_j  = rbuf_j;
2895     redund->rbuf_a  = rbuf_a;
2896 
2897     redund->Destroy = C->ops->destroy;
2898     C->ops->destroy = MatDestroy_MatRedundant;
2899   }
2900   PetscFunctionReturn(0);
2901 }
2902 
2903 #undef __FUNCT__
2904 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2905 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2906 {
2907   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2908   PetscErrorCode ierr;
2909   PetscInt       i,*idxb = 0;
2910   PetscScalar    *va,*vb;
2911   Vec            vtmp;
2912 
2913   PetscFunctionBegin;
2914   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2915   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2916   if (idx) {
2917     for (i=0; i<A->rmap->n; i++) {
2918       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2919     }
2920   }
2921 
2922   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2923   if (idx) {
2924     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2925   }
2926   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2927   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2928 
2929   for (i=0; i<A->rmap->n; i++){
2930     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2931       va[i] = vb[i];
2932       if (idx) idx[i] = a->garray[idxb[i]];
2933     }
2934   }
2935 
2936   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2937   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2938   ierr = PetscFree(idxb);CHKERRQ(ierr);
2939   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 #undef __FUNCT__
2944 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2945 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2946 {
2947   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2948   PetscErrorCode ierr;
2949   PetscInt       i,*idxb = 0;
2950   PetscScalar    *va,*vb;
2951   Vec            vtmp;
2952 
2953   PetscFunctionBegin;
2954   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2955   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2956   if (idx) {
2957     for (i=0; i<A->cmap->n; i++) {
2958       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2959     }
2960   }
2961 
2962   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2963   if (idx) {
2964     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2965   }
2966   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2967   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2968 
2969   for (i=0; i<A->rmap->n; i++){
2970     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2971       va[i] = vb[i];
2972       if (idx) idx[i] = a->garray[idxb[i]];
2973     }
2974   }
2975 
2976   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2977   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2978   ierr = PetscFree(idxb);CHKERRQ(ierr);
2979   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 #undef __FUNCT__
2984 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2985 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2986 {
2987   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2988   PetscInt       n      = A->rmap->n;
2989   PetscInt       cstart = A->cmap->rstart;
2990   PetscInt      *cmap   = mat->garray;
2991   PetscInt      *diagIdx, *offdiagIdx;
2992   Vec            diagV, offdiagV;
2993   PetscScalar   *a, *diagA, *offdiagA;
2994   PetscInt       r;
2995   PetscErrorCode ierr;
2996 
2997   PetscFunctionBegin;
2998   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2999   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
3000   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
3001   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3002   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3003   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3004   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3005   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3006   for (r = 0; r < n; ++r) {
3007     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3008       a[r]   = diagA[r];
3009       idx[r] = cstart + diagIdx[r];
3010     } else {
3011       a[r]   = offdiagA[r];
3012       idx[r] = cmap[offdiagIdx[r]];
3013     }
3014   }
3015   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3016   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3017   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3018   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3019   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3020   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3021   PetscFunctionReturn(0);
3022 }
3023 
3024 #undef __FUNCT__
3025 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3026 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3027 {
3028   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3029   PetscInt       n      = A->rmap->n;
3030   PetscInt       cstart = A->cmap->rstart;
3031   PetscInt      *cmap   = mat->garray;
3032   PetscInt      *diagIdx, *offdiagIdx;
3033   Vec            diagV, offdiagV;
3034   PetscScalar   *a, *diagA, *offdiagA;
3035   PetscInt       r;
3036   PetscErrorCode ierr;
3037 
3038   PetscFunctionBegin;
3039   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3040   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
3041   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
3042   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3043   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3044   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3045   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3046   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3047   for (r = 0; r < n; ++r) {
3048     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3049       a[r]   = diagA[r];
3050       idx[r] = cstart + diagIdx[r];
3051     } else {
3052       a[r]   = offdiagA[r];
3053       idx[r] = cmap[offdiagIdx[r]];
3054     }
3055   }
3056   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3057   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3058   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3059   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3060   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3061   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3062   PetscFunctionReturn(0);
3063 }
3064 
3065 #undef __FUNCT__
3066 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3067 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3068 {
3069   PetscErrorCode ierr;
3070   Mat            *dummy;
3071 
3072   PetscFunctionBegin;
3073   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3074   *newmat = *dummy;
3075   ierr = PetscFree(dummy);CHKERRQ(ierr);
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3080 
3081 #undef __FUNCT__
3082 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3083 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3084 {
3085   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3086   PetscErrorCode ierr;
3087 
3088   PetscFunctionBegin;
3089   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3090   PetscFunctionReturn(0);
3091 }
3092 
3093 #undef __FUNCT__
3094 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3095 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3096 {
3097   PetscErrorCode ierr;
3098   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3099 
3100   PetscFunctionBegin;
3101   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3102   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3103   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3104   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 /* -------------------------------------------------------------------*/
3109 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3110        MatGetRow_MPIAIJ,
3111        MatRestoreRow_MPIAIJ,
3112        MatMult_MPIAIJ,
3113 /* 4*/ MatMultAdd_MPIAIJ,
3114        MatMultTranspose_MPIAIJ,
3115        MatMultTransposeAdd_MPIAIJ,
3116 #ifdef PETSC_HAVE_PBGL
3117        MatSolve_MPIAIJ,
3118 #else
3119        0,
3120 #endif
3121        0,
3122        0,
3123 /*10*/ 0,
3124        0,
3125        0,
3126        MatSOR_MPIAIJ,
3127        MatTranspose_MPIAIJ,
3128 /*15*/ MatGetInfo_MPIAIJ,
3129        MatEqual_MPIAIJ,
3130        MatGetDiagonal_MPIAIJ,
3131        MatDiagonalScale_MPIAIJ,
3132        MatNorm_MPIAIJ,
3133 /*20*/ MatAssemblyBegin_MPIAIJ,
3134        MatAssemblyEnd_MPIAIJ,
3135        MatSetOption_MPIAIJ,
3136        MatZeroEntries_MPIAIJ,
3137 /*24*/ MatZeroRows_MPIAIJ,
3138        0,
3139 #ifdef PETSC_HAVE_PBGL
3140        0,
3141 #else
3142        0,
3143 #endif
3144        0,
3145        0,
3146 /*29*/ MatSetUp_MPIAIJ,
3147 #ifdef PETSC_HAVE_PBGL
3148        0,
3149 #else
3150        0,
3151 #endif
3152        0,
3153        0,
3154        0,
3155 /*34*/ MatDuplicate_MPIAIJ,
3156        0,
3157        0,
3158        0,
3159        0,
3160 /*39*/ MatAXPY_MPIAIJ,
3161        MatGetSubMatrices_MPIAIJ,
3162        MatIncreaseOverlap_MPIAIJ,
3163        MatGetValues_MPIAIJ,
3164        MatCopy_MPIAIJ,
3165 /*44*/ MatGetRowMax_MPIAIJ,
3166        MatScale_MPIAIJ,
3167        0,
3168        0,
3169        MatZeroRowsColumns_MPIAIJ,
3170 /*49*/ MatSetRandom_MPIAIJ,
3171        0,
3172        0,
3173        0,
3174        0,
3175 /*54*/ MatFDColoringCreate_MPIAIJ,
3176        0,
3177        MatSetUnfactored_MPIAIJ,
3178        MatPermute_MPIAIJ,
3179        0,
3180 /*59*/ MatGetSubMatrix_MPIAIJ,
3181        MatDestroy_MPIAIJ,
3182        MatView_MPIAIJ,
3183        0,
3184        0,
3185 /*64*/ 0,
3186        0,
3187        0,
3188        0,
3189        0,
3190 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3191        MatGetRowMinAbs_MPIAIJ,
3192        0,
3193        MatSetColoring_MPIAIJ,
3194 #if defined(PETSC_HAVE_ADIC)
3195        MatSetValuesAdic_MPIAIJ,
3196 #else
3197        0,
3198 #endif
3199        MatSetValuesAdifor_MPIAIJ,
3200 /*75*/ MatFDColoringApply_AIJ,
3201        0,
3202        0,
3203        0,
3204        MatFindZeroDiagonals_MPIAIJ,
3205 /*80*/ 0,
3206        0,
3207        0,
3208 /*83*/ MatLoad_MPIAIJ,
3209        0,
3210        0,
3211        0,
3212        0,
3213        0,
3214 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3215        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3216        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3217        MatPtAP_Basic,
3218        MatPtAPSymbolic_MPIAIJ,
3219 /*94*/ MatPtAPNumeric_MPIAIJ,
3220        0,
3221        0,
3222        0,
3223        0,
3224 /*99*/ 0,
3225        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3226        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3227        MatConjugate_MPIAIJ,
3228        0,
3229 /*104*/MatSetValuesRow_MPIAIJ,
3230        MatRealPart_MPIAIJ,
3231        MatImaginaryPart_MPIAIJ,
3232        0,
3233        0,
3234 /*109*/0,
3235        MatGetRedundantMatrix_MPIAIJ,
3236        MatGetRowMin_MPIAIJ,
3237        0,
3238        0,
3239 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3240        0,
3241        0,
3242        0,
3243        0,
3244 /*119*/0,
3245        0,
3246        0,
3247        0,
3248        MatGetMultiProcBlock_MPIAIJ,
3249 /*124*/MatFindNonzeroRows_MPIAIJ,
3250        MatGetColumnNorms_MPIAIJ,
3251        MatInvertBlockDiagonal_MPIAIJ,
3252        0,
3253        MatGetSubMatricesParallel_MPIAIJ,
3254 /*129*/0,
3255        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3256        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3257        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3258        0,
3259 /*134*/0,
3260        0,
3261        0,
3262        0,
3263        0
3264 };
3265 
3266 /* ----------------------------------------------------------------------------------------*/
3267 
3268 EXTERN_C_BEGIN
3269 #undef __FUNCT__
3270 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3271 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3272 {
3273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3274   PetscErrorCode ierr;
3275 
3276   PetscFunctionBegin;
3277   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3278   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3279   PetscFunctionReturn(0);
3280 }
3281 EXTERN_C_END
3282 
3283 EXTERN_C_BEGIN
3284 #undef __FUNCT__
3285 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3286 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3287 {
3288   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3289   PetscErrorCode ierr;
3290 
3291   PetscFunctionBegin;
3292   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3293   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3294   PetscFunctionReturn(0);
3295 }
3296 EXTERN_C_END
3297 
3298 EXTERN_C_BEGIN
3299 #undef __FUNCT__
3300 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3301 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3302 {
3303   Mat_MPIAIJ     *b;
3304   PetscErrorCode ierr;
3305   PetscInt       i;
3306   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3307 
3308   PetscFunctionBegin;
3309   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3310   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3311   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3312   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3313   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3314   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3315 
3316   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3317   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3318   if (d_nnz) {
3319     for (i=0; i<B->rmap->n; i++) {
3320       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3321     }
3322   }
3323   if (o_nnz) {
3324     for (i=0; i<B->rmap->n; i++) {
3325       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3326     }
3327   }
3328   b = (Mat_MPIAIJ*)B->data;
3329 
3330   if (!B->preallocated) {
3331     /* Explicitly create 2 MATSEQAIJ matrices. */
3332     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3333     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3334     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3335     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3336     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3337     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3338     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3339     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3340     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3341     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3342   }
3343 
3344   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3345   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3346   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3347   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3348   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3349   B->preallocated = PETSC_TRUE;
3350   PetscFunctionReturn(0);
3351 }
3352 EXTERN_C_END
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3356 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3357 {
3358   Mat            mat;
3359   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3360   PetscErrorCode ierr;
3361 
3362   PetscFunctionBegin;
3363   *newmat       = 0;
3364   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3365   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3366   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3367   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3368   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3369   a    = (Mat_MPIAIJ*)mat->data;
3370 
3371   mat->factortype    = matin->factortype;
3372   mat->rmap->bs      = matin->rmap->bs;
3373   mat->cmap->bs      = matin->cmap->bs;
3374   mat->assembled    = PETSC_TRUE;
3375   mat->insertmode   = NOT_SET_VALUES;
3376   mat->preallocated = PETSC_TRUE;
3377 
3378   a->size           = oldmat->size;
3379   a->rank           = oldmat->rank;
3380   a->donotstash     = oldmat->donotstash;
3381   a->roworiented    = oldmat->roworiented;
3382   a->rowindices     = 0;
3383   a->rowvalues      = 0;
3384   a->getrowactive   = PETSC_FALSE;
3385 
3386   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3387   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3388 
3389   if (oldmat->colmap) {
3390 #if defined (PETSC_USE_CTABLE)
3391     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3392 #else
3393     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3394     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3395     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3396 #endif
3397   } else a->colmap = 0;
3398   if (oldmat->garray) {
3399     PetscInt len;
3400     len  = oldmat->B->cmap->n;
3401     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3402     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3403     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3404   } else a->garray = 0;
3405 
3406   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3407   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3408   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3409   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3410   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3411   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3412   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3413   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3414   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3415   *newmat = mat;
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 
3420 
3421 #undef __FUNCT__
3422 #define __FUNCT__ "MatLoad_MPIAIJ"
3423 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3424 {
3425   PetscScalar    *vals,*svals;
3426   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3427   PetscErrorCode ierr;
3428   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3429   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3430   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3431   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3432   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3433   int            fd;
3434   PetscInt       bs = 1;
3435 
3436   PetscFunctionBegin;
3437   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3438   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3439   if (!rank) {
3440     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3441     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3442     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3443   }
3444 
3445   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3446   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3447   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3448 
3449   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3450 
3451   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3452   M = header[1]; N = header[2];
3453   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3454   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3455   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3456 
3457   /* If global sizes are set, check if they are consistent with that given in the file */
3458   if (sizesset) {
3459     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3460   }
3461   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3462   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3463 
3464   /* determine ownership of all (block) rows */
3465   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3466   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3467   else m = newMat->rmap->n; /* Set by user */
3468 
3469   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3470   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3471 
3472   /* First process needs enough room for process with most rows */
3473   if (!rank) {
3474     mmax = rowners[1];
3475     for (i=2; i<=size; i++) {
3476       mmax = PetscMax(mmax, rowners[i]);
3477     }
3478   } else mmax = m;
3479 
3480   rowners[0] = 0;
3481   for (i=2; i<=size; i++) {
3482     rowners[i] += rowners[i-1];
3483   }
3484   rstart = rowners[rank];
3485   rend   = rowners[rank+1];
3486 
3487   /* distribute row lengths to all processors */
3488   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3489   if (!rank) {
3490     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3491     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3492     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3493     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3494     for (j=0; j<m; j++) {
3495       procsnz[0] += ourlens[j];
3496     }
3497     for (i=1; i<size; i++) {
3498       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3499       /* calculate the number of nonzeros on each processor */
3500       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3501         procsnz[i] += rowlengths[j];
3502       }
3503       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3504     }
3505     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3506   } else {
3507     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3508   }
3509 
3510   if (!rank) {
3511     /* determine max buffer needed and allocate it */
3512     maxnz = 0;
3513     for (i=0; i<size; i++) {
3514       maxnz = PetscMax(maxnz,procsnz[i]);
3515     }
3516     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3517 
3518     /* read in my part of the matrix column indices  */
3519     nz   = procsnz[0];
3520     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3521     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3522 
3523     /* read in every one elses and ship off */
3524     for (i=1; i<size; i++) {
3525       nz     = procsnz[i];
3526       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3527       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3528     }
3529     ierr = PetscFree(cols);CHKERRQ(ierr);
3530   } else {
3531     /* determine buffer space needed for message */
3532     nz = 0;
3533     for (i=0; i<m; i++) {
3534       nz += ourlens[i];
3535     }
3536     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3537 
3538     /* receive message of column indices*/
3539     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3540   }
3541 
3542   /* determine column ownership if matrix is not square */
3543   if (N != M) {
3544     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3545     else n = newMat->cmap->n;
3546     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3547     cstart = cend - n;
3548   } else {
3549     cstart = rstart;
3550     cend   = rend;
3551     n      = cend - cstart;
3552   }
3553 
3554   /* loop over local rows, determining number of off diagonal entries */
3555   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3556   jj = 0;
3557   for (i=0; i<m; i++) {
3558     for (j=0; j<ourlens[i]; j++) {
3559       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3560       jj++;
3561     }
3562   }
3563 
3564   for (i=0; i<m; i++) {
3565     ourlens[i] -= offlens[i];
3566   }
3567   if (!sizesset) {
3568     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3569   }
3570 
3571   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3572 
3573   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3574 
3575   for (i=0; i<m; i++) {
3576     ourlens[i] += offlens[i];
3577   }
3578 
3579   if (!rank) {
3580     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3581 
3582     /* read in my part of the matrix numerical values  */
3583     nz   = procsnz[0];
3584     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3585 
3586     /* insert into matrix */
3587     jj      = rstart;
3588     smycols = mycols;
3589     svals   = vals;
3590     for (i=0; i<m; i++) {
3591       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3592       smycols += ourlens[i];
3593       svals   += ourlens[i];
3594       jj++;
3595     }
3596 
3597     /* read in other processors and ship out */
3598     for (i=1; i<size; i++) {
3599       nz     = procsnz[i];
3600       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3601       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3602     }
3603     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3604   } else {
3605     /* receive numeric values */
3606     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3607 
3608     /* receive message of values*/
3609     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3610 
3611     /* insert into matrix */
3612     jj      = rstart;
3613     smycols = mycols;
3614     svals   = vals;
3615     for (i=0; i<m; i++) {
3616       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3617       smycols += ourlens[i];
3618       svals   += ourlens[i];
3619       jj++;
3620     }
3621   }
3622   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3623   ierr = PetscFree(vals);CHKERRQ(ierr);
3624   ierr = PetscFree(mycols);CHKERRQ(ierr);
3625   ierr = PetscFree(rowners);CHKERRQ(ierr);
3626   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3627   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3628 
3629   PetscFunctionReturn(0);
3630 }
3631 
3632 #undef __FUNCT__
3633 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3634 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3635 {
3636   PetscErrorCode ierr;
3637   IS             iscol_local;
3638   PetscInt       csize;
3639 
3640   PetscFunctionBegin;
3641   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3642   if (call == MAT_REUSE_MATRIX) {
3643     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3644     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3645   } else {
3646     PetscInt cbs;
3647     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3648     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3649     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3650   }
3651   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3652   if (call == MAT_INITIAL_MATRIX) {
3653     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3654     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3655   }
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3660 #undef __FUNCT__
3661 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3662 /*
3663     Not great since it makes two copies of the submatrix, first an SeqAIJ
3664   in local and then by concatenating the local matrices the end result.
3665   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3666 
3667   Note: This requires a sequential iscol with all indices.
3668 */
3669 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3670 {
3671   PetscErrorCode ierr;
3672   PetscMPIInt    rank,size;
3673   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3674   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3675   PetscBool      allcolumns, colflag;
3676   Mat            M,Mreuse;
3677   MatScalar      *vwork,*aa;
3678   MPI_Comm       comm = ((PetscObject)mat)->comm;
3679   Mat_SeqAIJ     *aij;
3680 
3681 
3682   PetscFunctionBegin;
3683   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3684   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3685 
3686   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3687   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3688   if (colflag && ncol == mat->cmap->N){
3689     allcolumns = PETSC_TRUE;
3690   } else {
3691     allcolumns = PETSC_FALSE;
3692   }
3693   if (call ==  MAT_REUSE_MATRIX) {
3694     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3695     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3696     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3697   } else {
3698     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3699   }
3700 
3701   /*
3702       m - number of local rows
3703       n - number of columns (same on all processors)
3704       rstart - first row in new global matrix generated
3705   */
3706   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3707   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3708   if (call == MAT_INITIAL_MATRIX) {
3709     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3710     ii  = aij->i;
3711     jj  = aij->j;
3712 
3713     /*
3714         Determine the number of non-zeros in the diagonal and off-diagonal
3715         portions of the matrix in order to do correct preallocation
3716     */
3717 
3718     /* first get start and end of "diagonal" columns */
3719     if (csize == PETSC_DECIDE) {
3720       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3721       if (mglobal == n) { /* square matrix */
3722 	nlocal = m;
3723       } else {
3724         nlocal = n/size + ((n % size) > rank);
3725       }
3726     } else {
3727       nlocal = csize;
3728     }
3729     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3730     rstart = rend - nlocal;
3731     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3732 
3733     /* next, compute all the lengths */
3734     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3735     olens = dlens + m;
3736     for (i=0; i<m; i++) {
3737       jend = ii[i+1] - ii[i];
3738       olen = 0;
3739       dlen = 0;
3740       for (j=0; j<jend; j++) {
3741         if (*jj < rstart || *jj >= rend) olen++;
3742         else dlen++;
3743         jj++;
3744       }
3745       olens[i] = olen;
3746       dlens[i] = dlen;
3747     }
3748     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3749     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3750     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3751     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3752     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3753     ierr = PetscFree(dlens);CHKERRQ(ierr);
3754   } else {
3755     PetscInt ml,nl;
3756 
3757     M = *newmat;
3758     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3759     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3760     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3761     /*
3762          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3763        rather than the slower MatSetValues().
3764     */
3765     M->was_assembled = PETSC_TRUE;
3766     M->assembled     = PETSC_FALSE;
3767   }
3768   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3769   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3770   ii  = aij->i;
3771   jj  = aij->j;
3772   aa  = aij->a;
3773   for (i=0; i<m; i++) {
3774     row   = rstart + i;
3775     nz    = ii[i+1] - ii[i];
3776     cwork = jj;     jj += nz;
3777     vwork = aa;     aa += nz;
3778     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3779   }
3780 
3781   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3782   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3783   *newmat = M;
3784 
3785   /* save submatrix used in processor for next request */
3786   if (call ==  MAT_INITIAL_MATRIX) {
3787     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3788     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3789   }
3790 
3791   PetscFunctionReturn(0);
3792 }
3793 
3794 EXTERN_C_BEGIN
3795 #undef __FUNCT__
3796 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3797 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3798 {
3799   PetscInt       m,cstart, cend,j,nnz,i,d;
3800   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3801   const PetscInt *JJ;
3802   PetscScalar    *values;
3803   PetscErrorCode ierr;
3804 
3805   PetscFunctionBegin;
3806   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3807 
3808   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3809   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3810   m      = B->rmap->n;
3811   cstart = B->cmap->rstart;
3812   cend   = B->cmap->rend;
3813   rstart = B->rmap->rstart;
3814 
3815   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3816 
3817 #if defined(PETSC_USE_DEBUGGING)
3818   for (i=0; i<m; i++) {
3819     nnz     = Ii[i+1]- Ii[i];
3820     JJ      = J + Ii[i];
3821     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3822     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3823     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3824   }
3825 #endif
3826 
3827   for (i=0; i<m; i++) {
3828     nnz     = Ii[i+1]- Ii[i];
3829     JJ      = J + Ii[i];
3830     nnz_max = PetscMax(nnz_max,nnz);
3831     d       = 0;
3832     for (j=0; j<nnz; j++) {
3833       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3834     }
3835     d_nnz[i] = d;
3836     o_nnz[i] = nnz - d;
3837   }
3838   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3839   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3840 
3841   if (v) values = (PetscScalar*)v;
3842   else {
3843     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3844     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3845   }
3846 
3847   for (i=0; i<m; i++) {
3848     ii   = i + rstart;
3849     nnz  = Ii[i+1]- Ii[i];
3850     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3851   }
3852   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3853   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3854 
3855   if (!v) {
3856     ierr = PetscFree(values);CHKERRQ(ierr);
3857   }
3858   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3859   PetscFunctionReturn(0);
3860 }
3861 EXTERN_C_END
3862 
3863 #undef __FUNCT__
3864 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3865 /*@
3866    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3867    (the default parallel PETSc format).
3868 
3869    Collective on MPI_Comm
3870 
3871    Input Parameters:
3872 +  B - the matrix
3873 .  i - the indices into j for the start of each local row (starts with zero)
3874 .  j - the column indices for each local row (starts with zero)
3875 -  v - optional values in the matrix
3876 
3877    Level: developer
3878 
3879    Notes:
3880        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3881      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3882      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3883 
3884        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3885 
3886        The format which is used for the sparse matrix input, is equivalent to a
3887     row-major ordering.. i.e for the following matrix, the input data expected is
3888     as shown:
3889 
3890         1 0 0
3891         2 0 3     P0
3892        -------
3893         4 5 6     P1
3894 
3895      Process0 [P0]: rows_owned=[0,1]
3896         i =  {0,1,3}  [size = nrow+1  = 2+1]
3897         j =  {0,0,2}  [size = nz = 6]
3898         v =  {1,2,3}  [size = nz = 6]
3899 
3900      Process1 [P1]: rows_owned=[2]
3901         i =  {0,3}    [size = nrow+1  = 1+1]
3902         j =  {0,1,2}  [size = nz = 6]
3903         v =  {4,5,6}  [size = nz = 6]
3904 
3905 .keywords: matrix, aij, compressed row, sparse, parallel
3906 
3907 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3908           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3909 @*/
3910 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3911 {
3912   PetscErrorCode ierr;
3913 
3914   PetscFunctionBegin;
3915   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3916   PetscFunctionReturn(0);
3917 }
3918 
3919 #undef __FUNCT__
3920 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3921 /*@C
3922    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3923    (the default parallel PETSc format).  For good matrix assembly performance
3924    the user should preallocate the matrix storage by setting the parameters
3925    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3926    performance can be increased by more than a factor of 50.
3927 
3928    Collective on MPI_Comm
3929 
3930    Input Parameters:
3931 +  A - the matrix
3932 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3933            (same value is used for all local rows)
3934 .  d_nnz - array containing the number of nonzeros in the various rows of the
3935            DIAGONAL portion of the local submatrix (possibly different for each row)
3936            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3937            The size of this array is equal to the number of local rows, i.e 'm'.
3938            For matrices that will be factored, you must leave room for (and set)
3939            the diagonal entry even if it is zero.
3940 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3941            submatrix (same value is used for all local rows).
3942 -  o_nnz - array containing the number of nonzeros in the various rows of the
3943            OFF-DIAGONAL portion of the local submatrix (possibly different for
3944            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3945            structure. The size of this array is equal to the number
3946            of local rows, i.e 'm'.
3947 
3948    If the *_nnz parameter is given then the *_nz parameter is ignored
3949 
3950    The AIJ format (also called the Yale sparse matrix format or
3951    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3952    storage.  The stored row and column indices begin with zero.
3953    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3954 
3955    The parallel matrix is partitioned such that the first m0 rows belong to
3956    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3957    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3958 
3959    The DIAGONAL portion of the local submatrix of a processor can be defined
3960    as the submatrix which is obtained by extraction the part corresponding to
3961    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3962    first row that belongs to the processor, r2 is the last row belonging to
3963    the this processor, and c1-c2 is range of indices of the local part of a
3964    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3965    common case of a square matrix, the row and column ranges are the same and
3966    the DIAGONAL part is also square. The remaining portion of the local
3967    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3968 
3969    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3970 
3971    You can call MatGetInfo() to get information on how effective the preallocation was;
3972    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3973    You can also run with the option -info and look for messages with the string
3974    malloc in them to see if additional memory allocation was needed.
3975 
3976    Example usage:
3977 
3978    Consider the following 8x8 matrix with 34 non-zero values, that is
3979    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3980    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3981    as follows:
3982 
3983 .vb
3984             1  2  0  |  0  3  0  |  0  4
3985     Proc0   0  5  6  |  7  0  0  |  8  0
3986             9  0 10  | 11  0  0  | 12  0
3987     -------------------------------------
3988            13  0 14  | 15 16 17  |  0  0
3989     Proc1   0 18  0  | 19 20 21  |  0  0
3990             0  0  0  | 22 23  0  | 24  0
3991     -------------------------------------
3992     Proc2  25 26 27  |  0  0 28  | 29  0
3993            30  0  0  | 31 32 33  |  0 34
3994 .ve
3995 
3996    This can be represented as a collection of submatrices as:
3997 
3998 .vb
3999       A B C
4000       D E F
4001       G H I
4002 .ve
4003 
4004    Where the submatrices A,B,C are owned by proc0, D,E,F are
4005    owned by proc1, G,H,I are owned by proc2.
4006 
4007    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4008    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4009    The 'M','N' parameters are 8,8, and have the same values on all procs.
4010 
4011    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4012    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4013    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4014    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4015    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4016    matrix, ans [DF] as another SeqAIJ matrix.
4017 
4018    When d_nz, o_nz parameters are specified, d_nz storage elements are
4019    allocated for every row of the local diagonal submatrix, and o_nz
4020    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4021    One way to choose d_nz and o_nz is to use the max nonzerors per local
4022    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4023    In this case, the values of d_nz,o_nz are:
4024 .vb
4025      proc0 : dnz = 2, o_nz = 2
4026      proc1 : dnz = 3, o_nz = 2
4027      proc2 : dnz = 1, o_nz = 4
4028 .ve
4029    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4030    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4031    for proc3. i.e we are using 12+15+10=37 storage locations to store
4032    34 values.
4033 
4034    When d_nnz, o_nnz parameters are specified, the storage is specified
4035    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4036    In the above case the values for d_nnz,o_nnz are:
4037 .vb
4038      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4039      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4040      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4041 .ve
4042    Here the space allocated is sum of all the above values i.e 34, and
4043    hence pre-allocation is perfect.
4044 
4045    Level: intermediate
4046 
4047 .keywords: matrix, aij, compressed row, sparse, parallel
4048 
4049 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4050           MPIAIJ, MatGetInfo()
4051 @*/
4052 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4053 {
4054   PetscErrorCode ierr;
4055 
4056   PetscFunctionBegin;
4057   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4058   PetscValidType(B,1);
4059   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4060   PetscFunctionReturn(0);
4061 }
4062 
4063 #undef __FUNCT__
4064 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4065 /*@
4066      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4067          CSR format the local rows.
4068 
4069    Collective on MPI_Comm
4070 
4071    Input Parameters:
4072 +  comm - MPI communicator
4073 .  m - number of local rows (Cannot be PETSC_DECIDE)
4074 .  n - This value should be the same as the local size used in creating the
4075        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4076        calculated if N is given) For square matrices n is almost always m.
4077 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4078 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4079 .   i - row indices
4080 .   j - column indices
4081 -   a - matrix values
4082 
4083    Output Parameter:
4084 .   mat - the matrix
4085 
4086    Level: intermediate
4087 
4088    Notes:
4089        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4090      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4091      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4092 
4093        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4094 
4095        The format which is used for the sparse matrix input, is equivalent to a
4096     row-major ordering.. i.e for the following matrix, the input data expected is
4097     as shown:
4098 
4099         1 0 0
4100         2 0 3     P0
4101        -------
4102         4 5 6     P1
4103 
4104      Process0 [P0]: rows_owned=[0,1]
4105         i =  {0,1,3}  [size = nrow+1  = 2+1]
4106         j =  {0,0,2}  [size = nz = 6]
4107         v =  {1,2,3}  [size = nz = 6]
4108 
4109      Process1 [P1]: rows_owned=[2]
4110         i =  {0,3}    [size = nrow+1  = 1+1]
4111         j =  {0,1,2}  [size = nz = 6]
4112         v =  {4,5,6}  [size = nz = 6]
4113 
4114 .keywords: matrix, aij, compressed row, sparse, parallel
4115 
4116 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4117           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4118 @*/
4119 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4120 {
4121   PetscErrorCode ierr;
4122 
4123   PetscFunctionBegin;
4124   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4125   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4126   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4127   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4128   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4129   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4130   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4131   PetscFunctionReturn(0);
4132 }
4133 
4134 #undef __FUNCT__
4135 #define __FUNCT__ "MatCreateAIJ"
4136 /*@C
4137    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4138    (the default parallel PETSc format).  For good matrix assembly performance
4139    the user should preallocate the matrix storage by setting the parameters
4140    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4141    performance can be increased by more than a factor of 50.
4142 
4143    Collective on MPI_Comm
4144 
4145    Input Parameters:
4146 +  comm - MPI communicator
4147 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4148            This value should be the same as the local size used in creating the
4149            y vector for the matrix-vector product y = Ax.
4150 .  n - This value should be the same as the local size used in creating the
4151        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4152        calculated if N is given) For square matrices n is almost always m.
4153 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4154 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4155 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4156            (same value is used for all local rows)
4157 .  d_nnz - array containing the number of nonzeros in the various rows of the
4158            DIAGONAL portion of the local submatrix (possibly different for each row)
4159            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4160            The size of this array is equal to the number of local rows, i.e 'm'.
4161 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4162            submatrix (same value is used for all local rows).
4163 -  o_nnz - array containing the number of nonzeros in the various rows of the
4164            OFF-DIAGONAL portion of the local submatrix (possibly different for
4165            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4166            structure. The size of this array is equal to the number
4167            of local rows, i.e 'm'.
4168 
4169    Output Parameter:
4170 .  A - the matrix
4171 
4172    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4173    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4174    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4175 
4176    Notes:
4177    If the *_nnz parameter is given then the *_nz parameter is ignored
4178 
4179    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4180    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4181    storage requirements for this matrix.
4182 
4183    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4184    processor than it must be used on all processors that share the object for
4185    that argument.
4186 
4187    The user MUST specify either the local or global matrix dimensions
4188    (possibly both).
4189 
4190    The parallel matrix is partitioned across processors such that the
4191    first m0 rows belong to process 0, the next m1 rows belong to
4192    process 1, the next m2 rows belong to process 2 etc.. where
4193    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4194    values corresponding to [m x N] submatrix.
4195 
4196    The columns are logically partitioned with the n0 columns belonging
4197    to 0th partition, the next n1 columns belonging to the next
4198    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4199 
4200    The DIAGONAL portion of the local submatrix on any given processor
4201    is the submatrix corresponding to the rows and columns m,n
4202    corresponding to the given processor. i.e diagonal matrix on
4203    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4204    etc. The remaining portion of the local submatrix [m x (N-n)]
4205    constitute the OFF-DIAGONAL portion. The example below better
4206    illustrates this concept.
4207 
4208    For a square global matrix we define each processor's diagonal portion
4209    to be its local rows and the corresponding columns (a square submatrix);
4210    each processor's off-diagonal portion encompasses the remainder of the
4211    local matrix (a rectangular submatrix).
4212 
4213    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4214 
4215    When calling this routine with a single process communicator, a matrix of
4216    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4217    type of communicator, use the construction mechanism:
4218      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4219 
4220    By default, this format uses inodes (identical nodes) when possible.
4221    We search for consecutive rows with the same nonzero structure, thereby
4222    reusing matrix information to achieve increased efficiency.
4223 
4224    Options Database Keys:
4225 +  -mat_no_inode  - Do not use inodes
4226 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4227 -  -mat_aij_oneindex - Internally use indexing starting at 1
4228         rather than 0.  Note that when calling MatSetValues(),
4229         the user still MUST index entries starting at 0!
4230 
4231 
4232    Example usage:
4233 
4234    Consider the following 8x8 matrix with 34 non-zero values, that is
4235    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4236    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4237    as follows:
4238 
4239 .vb
4240             1  2  0  |  0  3  0  |  0  4
4241     Proc0   0  5  6  |  7  0  0  |  8  0
4242             9  0 10  | 11  0  0  | 12  0
4243     -------------------------------------
4244            13  0 14  | 15 16 17  |  0  0
4245     Proc1   0 18  0  | 19 20 21  |  0  0
4246             0  0  0  | 22 23  0  | 24  0
4247     -------------------------------------
4248     Proc2  25 26 27  |  0  0 28  | 29  0
4249            30  0  0  | 31 32 33  |  0 34
4250 .ve
4251 
4252    This can be represented as a collection of submatrices as:
4253 
4254 .vb
4255       A B C
4256       D E F
4257       G H I
4258 .ve
4259 
4260    Where the submatrices A,B,C are owned by proc0, D,E,F are
4261    owned by proc1, G,H,I are owned by proc2.
4262 
4263    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4264    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4265    The 'M','N' parameters are 8,8, and have the same values on all procs.
4266 
4267    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4268    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4269    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4270    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4271    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4272    matrix, ans [DF] as another SeqAIJ matrix.
4273 
4274    When d_nz, o_nz parameters are specified, d_nz storage elements are
4275    allocated for every row of the local diagonal submatrix, and o_nz
4276    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4277    One way to choose d_nz and o_nz is to use the max nonzerors per local
4278    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4279    In this case, the values of d_nz,o_nz are:
4280 .vb
4281      proc0 : dnz = 2, o_nz = 2
4282      proc1 : dnz = 3, o_nz = 2
4283      proc2 : dnz = 1, o_nz = 4
4284 .ve
4285    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4286    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4287    for proc3. i.e we are using 12+15+10=37 storage locations to store
4288    34 values.
4289 
4290    When d_nnz, o_nnz parameters are specified, the storage is specified
4291    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4292    In the above case the values for d_nnz,o_nnz are:
4293 .vb
4294      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4295      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4296      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4297 .ve
4298    Here the space allocated is sum of all the above values i.e 34, and
4299    hence pre-allocation is perfect.
4300 
4301    Level: intermediate
4302 
4303 .keywords: matrix, aij, compressed row, sparse, parallel
4304 
4305 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4306           MPIAIJ, MatCreateMPIAIJWithArrays()
4307 @*/
4308 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4309 {
4310   PetscErrorCode ierr;
4311   PetscMPIInt    size;
4312 
4313   PetscFunctionBegin;
4314   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4315   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4316   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4317   if (size > 1) {
4318     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4319     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4320   } else {
4321     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4322     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4323   }
4324   PetscFunctionReturn(0);
4325 }
4326 
4327 #undef __FUNCT__
4328 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4329 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4330 {
4331   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4332 
4333   PetscFunctionBegin;
4334   *Ad     = a->A;
4335   *Ao     = a->B;
4336   *colmap = a->garray;
4337   PetscFunctionReturn(0);
4338 }
4339 
4340 #undef __FUNCT__
4341 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4342 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4343 {
4344   PetscErrorCode ierr;
4345   PetscInt       i;
4346   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4347 
4348   PetscFunctionBegin;
4349   if (coloring->ctype == IS_COLORING_GLOBAL) {
4350     ISColoringValue *allcolors,*colors;
4351     ISColoring      ocoloring;
4352 
4353     /* set coloring for diagonal portion */
4354     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4355 
4356     /* set coloring for off-diagonal portion */
4357     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4358     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4359     for (i=0; i<a->B->cmap->n; i++) {
4360       colors[i] = allcolors[a->garray[i]];
4361     }
4362     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4363     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4364     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4365     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4366   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4367     ISColoringValue *colors;
4368     PetscInt        *larray;
4369     ISColoring      ocoloring;
4370 
4371     /* set coloring for diagonal portion */
4372     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4373     for (i=0; i<a->A->cmap->n; i++) {
4374       larray[i] = i + A->cmap->rstart;
4375     }
4376     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4377     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4378     for (i=0; i<a->A->cmap->n; i++) {
4379       colors[i] = coloring->colors[larray[i]];
4380     }
4381     ierr = PetscFree(larray);CHKERRQ(ierr);
4382     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4383     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4384     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4385 
4386     /* set coloring for off-diagonal portion */
4387     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4388     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4389     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4390     for (i=0; i<a->B->cmap->n; i++) {
4391       colors[i] = coloring->colors[larray[i]];
4392     }
4393     ierr = PetscFree(larray);CHKERRQ(ierr);
4394     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4395     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4396     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4397   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4398 
4399   PetscFunctionReturn(0);
4400 }
4401 
4402 #if defined(PETSC_HAVE_ADIC)
4403 #undef __FUNCT__
4404 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4405 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4406 {
4407   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4408   PetscErrorCode ierr;
4409 
4410   PetscFunctionBegin;
4411   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4412   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4413   PetscFunctionReturn(0);
4414 }
4415 #endif
4416 
4417 #undef __FUNCT__
4418 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4419 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4420 {
4421   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4422   PetscErrorCode ierr;
4423 
4424   PetscFunctionBegin;
4425   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4426   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4427   PetscFunctionReturn(0);
4428 }
4429 
4430 #undef __FUNCT__
4431 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4432 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4433 {
4434   PetscErrorCode ierr;
4435   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4436   PetscInt       *indx;
4437 
4438   PetscFunctionBegin;
4439   /* This routine will ONLY return MPIAIJ type matrix */
4440   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4441   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4442   if (n == PETSC_DECIDE){
4443     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4444   }
4445   /* Check sum(n) = N */
4446   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4447   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4448 
4449   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4450   rstart -= m;
4451 
4452   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4453   for (i=0;i<m;i++) {
4454     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4455     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4456     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4457   }
4458 
4459   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4460   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4461   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4462   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4463   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4464   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4465   PetscFunctionReturn(0);
4466 }
4467 
4468 #undef __FUNCT__
4469 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4470 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4471 {
4472   PetscErrorCode ierr;
4473   PetscInt       m,N,i,rstart,nnz,Ii;
4474   PetscInt       *indx;
4475   PetscScalar    *values;
4476 
4477   PetscFunctionBegin;
4478   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4479   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4480   for (i=0;i<m;i++) {
4481     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4482     Ii    = i + rstart;
4483     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4484     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4485   }
4486   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4487   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 #undef __FUNCT__
4492 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4493 /*@
4494       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4495                  matrices from each processor
4496 
4497     Collective on MPI_Comm
4498 
4499    Input Parameters:
4500 +    comm - the communicators the parallel matrix will live on
4501 .    inmat - the input sequential matrices
4502 .    n - number of local columns (or PETSC_DECIDE)
4503 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4504 
4505    Output Parameter:
4506 .    outmat - the parallel matrix generated
4507 
4508     Level: advanced
4509 
4510    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4511 
4512 @*/
4513 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4514 {
4515   PetscErrorCode ierr;
4516 
4517   PetscFunctionBegin;
4518   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4519   if (scall == MAT_INITIAL_MATRIX){
4520     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4521   }
4522   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4523   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4524   PetscFunctionReturn(0);
4525 }
4526 
4527 #undef __FUNCT__
4528 #define __FUNCT__ "MatFileSplit"
4529 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4530 {
4531   PetscErrorCode    ierr;
4532   PetscMPIInt       rank;
4533   PetscInt          m,N,i,rstart,nnz;
4534   size_t            len;
4535   const PetscInt    *indx;
4536   PetscViewer       out;
4537   char              *name;
4538   Mat               B;
4539   const PetscScalar *values;
4540 
4541   PetscFunctionBegin;
4542   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4543   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4544   /* Should this be the type of the diagonal block of A? */
4545   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4546   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4547   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4548   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4549   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4550   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4551   for (i=0;i<m;i++) {
4552     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4553     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4554     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4555   }
4556   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4557   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4558 
4559   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4560   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4561   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4562   sprintf(name,"%s.%d",outfile,rank);
4563   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4564   ierr = PetscFree(name);
4565   ierr = MatView(B,out);CHKERRQ(ierr);
4566   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4567   ierr = MatDestroy(&B);CHKERRQ(ierr);
4568   PetscFunctionReturn(0);
4569 }
4570 
4571 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4572 #undef __FUNCT__
4573 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4574 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4575 {
4576   PetscErrorCode       ierr;
4577   Mat_Merge_SeqsToMPI  *merge;
4578   PetscContainer       container;
4579 
4580   PetscFunctionBegin;
4581   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4582   if (container) {
4583     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4584     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4585     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4586     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4587     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4588     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4589     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4590     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4591     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4592     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4593     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4594     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4595     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4596     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4597     ierr = PetscFree(merge);CHKERRQ(ierr);
4598     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4599   }
4600   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4601   PetscFunctionReturn(0);
4602 }
4603 
4604 #include <../src/mat/utils/freespace.h>
4605 #include <petscbt.h>
4606 
4607 #undef __FUNCT__
4608 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4609 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4610 {
4611   PetscErrorCode       ierr;
4612   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4613   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4614   PetscMPIInt          size,rank,taga,*len_s;
4615   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4616   PetscInt             proc,m;
4617   PetscInt             **buf_ri,**buf_rj;
4618   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4619   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4620   MPI_Request          *s_waits,*r_waits;
4621   MPI_Status           *status;
4622   MatScalar            *aa=a->a;
4623   MatScalar            **abuf_r,*ba_i;
4624   Mat_Merge_SeqsToMPI  *merge;
4625   PetscContainer       container;
4626 
4627   PetscFunctionBegin;
4628   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4629 
4630   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4631   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4632 
4633   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4634   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4635 
4636   bi     = merge->bi;
4637   bj     = merge->bj;
4638   buf_ri = merge->buf_ri;
4639   buf_rj = merge->buf_rj;
4640 
4641   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4642   owners = merge->rowmap->range;
4643   len_s  = merge->len_s;
4644 
4645   /* send and recv matrix values */
4646   /*-----------------------------*/
4647   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4648   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4649 
4650   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4651   for (proc=0,k=0; proc<size; proc++){
4652     if (!len_s[proc]) continue;
4653     i = owners[proc];
4654     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4655     k++;
4656   }
4657 
4658   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4659   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4660   ierr = PetscFree(status);CHKERRQ(ierr);
4661 
4662   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4663   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4664 
4665   /* insert mat values of mpimat */
4666   /*----------------------------*/
4667   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4668   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4669 
4670   for (k=0; k<merge->nrecv; k++){
4671     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4672     nrows = *(buf_ri_k[k]);
4673     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4674     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4675   }
4676 
4677   /* set values of ba */
4678   m = merge->rowmap->n;
4679   for (i=0; i<m; i++) {
4680     arow = owners[rank] + i;
4681     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4682     bnzi = bi[i+1] - bi[i];
4683     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4684 
4685     /* add local non-zero vals of this proc's seqmat into ba */
4686     anzi = ai[arow+1] - ai[arow];
4687     aj   = a->j + ai[arow];
4688     aa   = a->a + ai[arow];
4689     nextaj = 0;
4690     for (j=0; nextaj<anzi; j++){
4691       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4692         ba_i[j] += aa[nextaj++];
4693       }
4694     }
4695 
4696     /* add received vals into ba */
4697     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4698       /* i-th row */
4699       if (i == *nextrow[k]) {
4700         anzi = *(nextai[k]+1) - *nextai[k];
4701         aj   = buf_rj[k] + *(nextai[k]);
4702         aa   = abuf_r[k] + *(nextai[k]);
4703         nextaj = 0;
4704         for (j=0; nextaj<anzi; j++){
4705           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4706             ba_i[j] += aa[nextaj++];
4707           }
4708         }
4709         nextrow[k]++; nextai[k]++;
4710       }
4711     }
4712     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4713   }
4714   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4715   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4716 
4717   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4718   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4719   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4720   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4721   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4722   PetscFunctionReturn(0);
4723 }
4724 
4725 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4726 
4727 #undef __FUNCT__
4728 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4729 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4730 {
4731   PetscErrorCode       ierr;
4732   Mat                  B_mpi;
4733   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4734   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4735   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4736   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4737   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4738   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4739   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4740   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4741   MPI_Status           *status;
4742   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4743   PetscBT              lnkbt;
4744   Mat_Merge_SeqsToMPI  *merge;
4745   PetscContainer       container;
4746 
4747   PetscFunctionBegin;
4748   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4749 
4750   /* make sure it is a PETSc comm */
4751   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4752   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4753   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4754 
4755   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4756   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4757 
4758   /* determine row ownership */
4759   /*---------------------------------------------------------*/
4760   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4761   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4762   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4763   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4764   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4765   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4766   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4767 
4768   m      = merge->rowmap->n;
4769   M      = merge->rowmap->N;
4770   owners = merge->rowmap->range;
4771 
4772   /* determine the number of messages to send, their lengths */
4773   /*---------------------------------------------------------*/
4774   len_s  = merge->len_s;
4775 
4776   len = 0;  /* length of buf_si[] */
4777   merge->nsend = 0;
4778   for (proc=0; proc<size; proc++){
4779     len_si[proc] = 0;
4780     if (proc == rank){
4781       len_s[proc] = 0;
4782     } else {
4783       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4784       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4785     }
4786     if (len_s[proc]) {
4787       merge->nsend++;
4788       nrows = 0;
4789       for (i=owners[proc]; i<owners[proc+1]; i++){
4790         if (ai[i+1] > ai[i]) nrows++;
4791       }
4792       len_si[proc] = 2*(nrows+1);
4793       len += len_si[proc];
4794     }
4795   }
4796 
4797   /* determine the number and length of messages to receive for ij-structure */
4798   /*-------------------------------------------------------------------------*/
4799   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4800   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4801 
4802   /* post the Irecv of j-structure */
4803   /*-------------------------------*/
4804   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4805   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4806 
4807   /* post the Isend of j-structure */
4808   /*--------------------------------*/
4809   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4810 
4811   for (proc=0, k=0; proc<size; proc++){
4812     if (!len_s[proc]) continue;
4813     i = owners[proc];
4814     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4815     k++;
4816   }
4817 
4818   /* receives and sends of j-structure are complete */
4819   /*------------------------------------------------*/
4820   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4821   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4822 
4823   /* send and recv i-structure */
4824   /*---------------------------*/
4825   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4826   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4827 
4828   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4829   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4830   for (proc=0,k=0; proc<size; proc++){
4831     if (!len_s[proc]) continue;
4832     /* form outgoing message for i-structure:
4833          buf_si[0]:                 nrows to be sent
4834                [1:nrows]:           row index (global)
4835                [nrows+1:2*nrows+1]: i-structure index
4836     */
4837     /*-------------------------------------------*/
4838     nrows = len_si[proc]/2 - 1;
4839     buf_si_i    = buf_si + nrows+1;
4840     buf_si[0]   = nrows;
4841     buf_si_i[0] = 0;
4842     nrows = 0;
4843     for (i=owners[proc]; i<owners[proc+1]; i++){
4844       anzi = ai[i+1] - ai[i];
4845       if (anzi) {
4846         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4847         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4848         nrows++;
4849       }
4850     }
4851     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4852     k++;
4853     buf_si += len_si[proc];
4854   }
4855 
4856   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4857   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4858 
4859   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4860   for (i=0; i<merge->nrecv; i++){
4861     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4862   }
4863 
4864   ierr = PetscFree(len_si);CHKERRQ(ierr);
4865   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4866   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4867   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4868   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4869   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4870   ierr = PetscFree(status);CHKERRQ(ierr);
4871 
4872   /* compute a local seq matrix in each processor */
4873   /*----------------------------------------------*/
4874   /* allocate bi array and free space for accumulating nonzero column info */
4875   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4876   bi[0] = 0;
4877 
4878   /* create and initialize a linked list */
4879   nlnk = N+1;
4880   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4881 
4882   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4883   len = 0;
4884   len  = ai[owners[rank+1]] - ai[owners[rank]];
4885   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4886   current_space = free_space;
4887 
4888   /* determine symbolic info for each local row */
4889   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4890 
4891   for (k=0; k<merge->nrecv; k++){
4892     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4893     nrows = *buf_ri_k[k];
4894     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4895     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4896   }
4897 
4898   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4899   len = 0;
4900   for (i=0;i<m;i++) {
4901     bnzi   = 0;
4902     /* add local non-zero cols of this proc's seqmat into lnk */
4903     arow   = owners[rank] + i;
4904     anzi   = ai[arow+1] - ai[arow];
4905     aj     = a->j + ai[arow];
4906     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4907     bnzi += nlnk;
4908     /* add received col data into lnk */
4909     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4910       if (i == *nextrow[k]) { /* i-th row */
4911         anzi = *(nextai[k]+1) - *nextai[k];
4912         aj   = buf_rj[k] + *nextai[k];
4913         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4914         bnzi += nlnk;
4915         nextrow[k]++; nextai[k]++;
4916       }
4917     }
4918     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4919 
4920     /* if free space is not available, make more free space */
4921     if (current_space->local_remaining<bnzi) {
4922       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4923       nspacedouble++;
4924     }
4925     /* copy data into free space, then initialize lnk */
4926     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4927     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4928 
4929     current_space->array           += bnzi;
4930     current_space->local_used      += bnzi;
4931     current_space->local_remaining -= bnzi;
4932 
4933     bi[i+1] = bi[i] + bnzi;
4934   }
4935 
4936   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4937 
4938   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4939   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4940   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4941 
4942   /* create symbolic parallel matrix B_mpi */
4943   /*---------------------------------------*/
4944     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4945   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4946   if (n==PETSC_DECIDE) {
4947     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4948   } else {
4949     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4950   }
4951   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4952   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4953   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4954   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4955   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4956 
4957   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4958   B_mpi->assembled     = PETSC_FALSE;
4959   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4960   merge->bi            = bi;
4961   merge->bj            = bj;
4962   merge->buf_ri        = buf_ri;
4963   merge->buf_rj        = buf_rj;
4964   merge->coi           = PETSC_NULL;
4965   merge->coj           = PETSC_NULL;
4966   merge->owners_co     = PETSC_NULL;
4967 
4968   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4969 
4970   /* attach the supporting struct to B_mpi for reuse */
4971   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4972   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4973   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4974   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4975   *mpimat = B_mpi;
4976 
4977   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4978   PetscFunctionReturn(0);
4979 }
4980 
4981 #undef __FUNCT__
4982 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4983 /*@C
4984       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4985                  matrices from each processor
4986 
4987     Collective on MPI_Comm
4988 
4989    Input Parameters:
4990 +    comm - the communicators the parallel matrix will live on
4991 .    seqmat - the input sequential matrices
4992 .    m - number of local rows (or PETSC_DECIDE)
4993 .    n - number of local columns (or PETSC_DECIDE)
4994 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4995 
4996    Output Parameter:
4997 .    mpimat - the parallel matrix generated
4998 
4999     Level: advanced
5000 
5001    Notes:
5002      The dimensions of the sequential matrix in each processor MUST be the same.
5003      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5004      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5005 @*/
5006 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5007 {
5008   PetscErrorCode   ierr;
5009   PetscMPIInt     size;
5010 
5011   PetscFunctionBegin;
5012   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5013   if (size == 1){
5014      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5015      if (scall == MAT_INITIAL_MATRIX){
5016        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5017      } else {
5018        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5019      }
5020      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5021      PetscFunctionReturn(0);
5022   }
5023   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5024   if (scall == MAT_INITIAL_MATRIX){
5025     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5026   }
5027   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5028   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5029   PetscFunctionReturn(0);
5030 }
5031 
5032 #undef __FUNCT__
5033 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5034 /*@
5035      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5036           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5037           with MatGetSize()
5038 
5039     Not Collective
5040 
5041    Input Parameters:
5042 +    A - the matrix
5043 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5044 
5045    Output Parameter:
5046 .    A_loc - the local sequential matrix generated
5047 
5048     Level: developer
5049 
5050 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5051 
5052 @*/
5053 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5054 {
5055   PetscErrorCode  ierr;
5056   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5057   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5058   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5059   MatScalar       *aa=a->a,*ba=b->a,*cam;
5060   PetscScalar     *ca;
5061   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5062   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5063   PetscBool       match;
5064 
5065   PetscFunctionBegin;
5066   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5067   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5068   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5069   if (scall == MAT_INITIAL_MATRIX){
5070     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5071     ci[0] = 0;
5072     for (i=0; i<am; i++){
5073       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5074     }
5075     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5076     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5077     k = 0;
5078     for (i=0; i<am; i++) {
5079       ncols_o = bi[i+1] - bi[i];
5080       ncols_d = ai[i+1] - ai[i];
5081       /* off-diagonal portion of A */
5082       for (jo=0; jo<ncols_o; jo++) {
5083         col = cmap[*bj];
5084         if (col >= cstart) break;
5085         cj[k]   = col; bj++;
5086         ca[k++] = *ba++;
5087       }
5088       /* diagonal portion of A */
5089       for (j=0; j<ncols_d; j++) {
5090         cj[k]   = cstart + *aj++;
5091         ca[k++] = *aa++;
5092       }
5093       /* off-diagonal portion of A */
5094       for (j=jo; j<ncols_o; j++) {
5095         cj[k]   = cmap[*bj++];
5096         ca[k++] = *ba++;
5097       }
5098     }
5099     /* put together the new matrix */
5100     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5101     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5102     /* Since these are PETSc arrays, change flags to free them as necessary. */
5103     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5104     mat->free_a  = PETSC_TRUE;
5105     mat->free_ij = PETSC_TRUE;
5106     mat->nonew   = 0;
5107   } else if (scall == MAT_REUSE_MATRIX){
5108     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5109     ci = mat->i; cj = mat->j; cam = mat->a;
5110     for (i=0; i<am; i++) {
5111       /* off-diagonal portion of A */
5112       ncols_o = bi[i+1] - bi[i];
5113       for (jo=0; jo<ncols_o; jo++) {
5114         col = cmap[*bj];
5115         if (col >= cstart) break;
5116         *cam++ = *ba++; bj++;
5117       }
5118       /* diagonal portion of A */
5119       ncols_d = ai[i+1] - ai[i];
5120       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5121       /* off-diagonal portion of A */
5122       for (j=jo; j<ncols_o; j++) {
5123         *cam++ = *ba++; bj++;
5124       }
5125     }
5126   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5127   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5128   PetscFunctionReturn(0);
5129 }
5130 
5131 #undef __FUNCT__
5132 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5133 /*@C
5134      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5135 
5136     Not Collective
5137 
5138    Input Parameters:
5139 +    A - the matrix
5140 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5141 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5142 
5143    Output Parameter:
5144 .    A_loc - the local sequential matrix generated
5145 
5146     Level: developer
5147 
5148 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5149 
5150 @*/
5151 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5152 {
5153   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5154   PetscErrorCode    ierr;
5155   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5156   IS                isrowa,iscola;
5157   Mat               *aloc;
5158   PetscBool       match;
5159 
5160   PetscFunctionBegin;
5161   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5162   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5163   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5164   if (!row){
5165     start = A->rmap->rstart; end = A->rmap->rend;
5166     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5167   } else {
5168     isrowa = *row;
5169   }
5170   if (!col){
5171     start = A->cmap->rstart;
5172     cmap  = a->garray;
5173     nzA   = a->A->cmap->n;
5174     nzB   = a->B->cmap->n;
5175     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5176     ncols = 0;
5177     for (i=0; i<nzB; i++) {
5178       if (cmap[i] < start) idx[ncols++] = cmap[i];
5179       else break;
5180     }
5181     imark = i;
5182     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5183     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5184     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5185   } else {
5186     iscola = *col;
5187   }
5188   if (scall != MAT_INITIAL_MATRIX){
5189     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5190     aloc[0] = *A_loc;
5191   }
5192   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5193   *A_loc = aloc[0];
5194   ierr = PetscFree(aloc);CHKERRQ(ierr);
5195   if (!row){
5196     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5197   }
5198   if (!col){
5199     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5200   }
5201   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5202   PetscFunctionReturn(0);
5203 }
5204 
5205 #undef __FUNCT__
5206 #define __FUNCT__ "MatGetBrowsOfAcols"
5207 /*@C
5208     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5209 
5210     Collective on Mat
5211 
5212    Input Parameters:
5213 +    A,B - the matrices in mpiaij format
5214 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5215 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5216 
5217    Output Parameter:
5218 +    rowb, colb - index sets of rows and columns of B to extract
5219 -    B_seq - the sequential matrix generated
5220 
5221     Level: developer
5222 
5223 @*/
5224 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5225 {
5226   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5227   PetscErrorCode    ierr;
5228   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5229   IS                isrowb,iscolb;
5230   Mat               *bseq=PETSC_NULL;
5231 
5232   PetscFunctionBegin;
5233   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5234     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5235   }
5236   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5237 
5238   if (scall == MAT_INITIAL_MATRIX){
5239     start = A->cmap->rstart;
5240     cmap  = a->garray;
5241     nzA   = a->A->cmap->n;
5242     nzB   = a->B->cmap->n;
5243     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5244     ncols = 0;
5245     for (i=0; i<nzB; i++) {  /* row < local row index */
5246       if (cmap[i] < start) idx[ncols++] = cmap[i];
5247       else break;
5248     }
5249     imark = i;
5250     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5251     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5252     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5253     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5254   } else {
5255     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5256     isrowb = *rowb; iscolb = *colb;
5257     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5258     bseq[0] = *B_seq;
5259   }
5260   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5261   *B_seq = bseq[0];
5262   ierr = PetscFree(bseq);CHKERRQ(ierr);
5263   if (!rowb){
5264     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5265   } else {
5266     *rowb = isrowb;
5267   }
5268   if (!colb){
5269     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5270   } else {
5271     *colb = iscolb;
5272   }
5273   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5274   PetscFunctionReturn(0);
5275 }
5276 
5277 #undef __FUNCT__
5278 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5279 /*
5280     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5281     of the OFF-DIAGONAL portion of local A
5282 
5283     Collective on Mat
5284 
5285    Input Parameters:
5286 +    A,B - the matrices in mpiaij format
5287 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5288 
5289    Output Parameter:
5290 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5291 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5292 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5293 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5294 
5295     Level: developer
5296 
5297 */
5298 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5299 {
5300   VecScatter_MPI_General *gen_to,*gen_from;
5301   PetscErrorCode         ierr;
5302   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5303   Mat_SeqAIJ             *b_oth;
5304   VecScatter             ctx=a->Mvctx;
5305   MPI_Comm               comm=((PetscObject)ctx)->comm;
5306   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5307   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5308   PetscScalar            *rvalues,*svalues;
5309   MatScalar              *b_otha,*bufa,*bufA;
5310   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5311   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5312   MPI_Status             *sstatus,rstatus;
5313   PetscMPIInt            jj;
5314   PetscInt               *cols,sbs,rbs;
5315   PetscScalar            *vals;
5316 
5317   PetscFunctionBegin;
5318   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5319     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5320   }
5321   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5322   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5323 
5324   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5325   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5326   rvalues  = gen_from->values; /* holds the length of receiving row */
5327   svalues  = gen_to->values;   /* holds the length of sending row */
5328   nrecvs   = gen_from->n;
5329   nsends   = gen_to->n;
5330 
5331   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5332   srow     = gen_to->indices;   /* local row index to be sent */
5333   sstarts  = gen_to->starts;
5334   sprocs   = gen_to->procs;
5335   sstatus  = gen_to->sstatus;
5336   sbs      = gen_to->bs;
5337   rstarts  = gen_from->starts;
5338   rprocs   = gen_from->procs;
5339   rbs      = gen_from->bs;
5340 
5341   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5342   if (scall == MAT_INITIAL_MATRIX){
5343     /* i-array */
5344     /*---------*/
5345     /*  post receives */
5346     for (i=0; i<nrecvs; i++){
5347       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5348       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5349       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5350     }
5351 
5352     /* pack the outgoing message */
5353     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5354     sstartsj[0] = 0;  rstartsj[0] = 0;
5355     len = 0; /* total length of j or a array to be sent */
5356     k = 0;
5357     for (i=0; i<nsends; i++){
5358       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5359       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5360       for (j=0; j<nrows; j++) {
5361         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5362         for (l=0; l<sbs; l++){
5363           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5364           rowlen[j*sbs+l] = ncols;
5365           len += ncols;
5366           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5367         }
5368         k++;
5369       }
5370       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5371       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5372     }
5373     /* recvs and sends of i-array are completed */
5374     i = nrecvs;
5375     while (i--) {
5376       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5377     }
5378     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5379 
5380     /* allocate buffers for sending j and a arrays */
5381     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5382     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5383 
5384     /* create i-array of B_oth */
5385     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5386     b_othi[0] = 0;
5387     len = 0; /* total length of j or a array to be received */
5388     k = 0;
5389     for (i=0; i<nrecvs; i++){
5390       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5391       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5392       for (j=0; j<nrows; j++) {
5393         b_othi[k+1] = b_othi[k] + rowlen[j];
5394         len += rowlen[j]; k++;
5395       }
5396       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5397     }
5398 
5399     /* allocate space for j and a arrrays of B_oth */
5400     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5401     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5402 
5403     /* j-array */
5404     /*---------*/
5405     /*  post receives of j-array */
5406     for (i=0; i<nrecvs; i++){
5407       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5408       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5409     }
5410 
5411     /* pack the outgoing message j-array */
5412     k = 0;
5413     for (i=0; i<nsends; i++){
5414       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5415       bufJ = bufj+sstartsj[i];
5416       for (j=0; j<nrows; j++) {
5417         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5418         for (ll=0; ll<sbs; ll++){
5419           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5420           for (l=0; l<ncols; l++){
5421             *bufJ++ = cols[l];
5422           }
5423           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5424         }
5425       }
5426       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5427     }
5428 
5429     /* recvs and sends of j-array are completed */
5430     i = nrecvs;
5431     while (i--) {
5432       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5433     }
5434     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5435   } else if (scall == MAT_REUSE_MATRIX){
5436     sstartsj = *startsj_s;
5437     rstartsj = *startsj_r;
5438     bufa     = *bufa_ptr;
5439     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5440     b_otha   = b_oth->a;
5441   } else {
5442     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5443   }
5444 
5445   /* a-array */
5446   /*---------*/
5447   /*  post receives of a-array */
5448   for (i=0; i<nrecvs; i++){
5449     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5450     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5451   }
5452 
5453   /* pack the outgoing message a-array */
5454   k = 0;
5455   for (i=0; i<nsends; i++){
5456     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5457     bufA = bufa+sstartsj[i];
5458     for (j=0; j<nrows; j++) {
5459       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5460       for (ll=0; ll<sbs; ll++){
5461         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5462         for (l=0; l<ncols; l++){
5463           *bufA++ = vals[l];
5464         }
5465         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5466       }
5467     }
5468     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5469   }
5470   /* recvs and sends of a-array are completed */
5471   i = nrecvs;
5472   while (i--) {
5473     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5474   }
5475   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5476   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5477 
5478   if (scall == MAT_INITIAL_MATRIX){
5479     /* put together the new matrix */
5480     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5481 
5482     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5483     /* Since these are PETSc arrays, change flags to free them as necessary. */
5484     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5485     b_oth->free_a  = PETSC_TRUE;
5486     b_oth->free_ij = PETSC_TRUE;
5487     b_oth->nonew   = 0;
5488 
5489     ierr = PetscFree(bufj);CHKERRQ(ierr);
5490     if (!startsj_s || !bufa_ptr){
5491       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5492       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5493     } else {
5494       *startsj_s = sstartsj;
5495       *startsj_r = rstartsj;
5496       *bufa_ptr  = bufa;
5497     }
5498   }
5499   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5500   PetscFunctionReturn(0);
5501 }
5502 
5503 #undef __FUNCT__
5504 #define __FUNCT__ "MatGetCommunicationStructs"
5505 /*@C
5506   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5507 
5508   Not Collective
5509 
5510   Input Parameters:
5511 . A - The matrix in mpiaij format
5512 
5513   Output Parameter:
5514 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5515 . colmap - A map from global column index to local index into lvec
5516 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5517 
5518   Level: developer
5519 
5520 @*/
5521 #if defined (PETSC_USE_CTABLE)
5522 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5523 #else
5524 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5525 #endif
5526 {
5527   Mat_MPIAIJ *a;
5528 
5529   PetscFunctionBegin;
5530   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5531   PetscValidPointer(lvec, 2);
5532   PetscValidPointer(colmap, 3);
5533   PetscValidPointer(multScatter, 4);
5534   a = (Mat_MPIAIJ *) A->data;
5535   if (lvec) *lvec = a->lvec;
5536   if (colmap) *colmap = a->colmap;
5537   if (multScatter) *multScatter = a->Mvctx;
5538   PetscFunctionReturn(0);
5539 }
5540 
5541 EXTERN_C_BEGIN
5542 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5543 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5544 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5545 EXTERN_C_END
5546 
5547 #undef __FUNCT__
5548 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5549 /*
5550     Computes (B'*A')' since computing B*A directly is untenable
5551 
5552                n                       p                          p
5553         (              )       (              )         (                  )
5554       m (      A       )  *  n (       B      )   =   m (         C        )
5555         (              )       (              )         (                  )
5556 
5557 */
5558 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5559 {
5560   PetscErrorCode     ierr;
5561   Mat                At,Bt,Ct;
5562 
5563   PetscFunctionBegin;
5564   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5565   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5566   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5567   ierr = MatDestroy(&At);CHKERRQ(ierr);
5568   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5569   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5570   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5571   PetscFunctionReturn(0);
5572 }
5573 
5574 #undef __FUNCT__
5575 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5576 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5577 {
5578   PetscErrorCode ierr;
5579   PetscInt       m=A->rmap->n,n=B->cmap->n;
5580   Mat            Cmat;
5581 
5582   PetscFunctionBegin;
5583   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5584   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5585   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5586   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5587   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5588   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5589   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5590   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5591   *C   = Cmat;
5592   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5593   PetscFunctionReturn(0);
5594 }
5595 
5596 /* ----------------------------------------------------------------*/
5597 #undef __FUNCT__
5598 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5599 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5600 {
5601   PetscErrorCode ierr;
5602 
5603   PetscFunctionBegin;
5604   if (scall == MAT_INITIAL_MATRIX){
5605     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5606   }
5607   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5608   PetscFunctionReturn(0);
5609 }
5610 
5611 EXTERN_C_BEGIN
5612 #if defined(PETSC_HAVE_MUMPS)
5613 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5614 #endif
5615 #if defined(PETSC_HAVE_PASTIX)
5616 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5617 #endif
5618 #if defined(PETSC_HAVE_SUPERLU_DIST)
5619 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5620 #endif
5621 #if defined(PETSC_HAVE_CLIQUE)
5622 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5623 #endif
5624 EXTERN_C_END
5625 
5626 /*MC
5627    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5628 
5629    Options Database Keys:
5630 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5631 
5632   Level: beginner
5633 
5634 .seealso: MatCreateAIJ()
5635 M*/
5636 
5637 EXTERN_C_BEGIN
5638 #undef __FUNCT__
5639 #define __FUNCT__ "MatCreate_MPIAIJ"
5640 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5641 {
5642   Mat_MPIAIJ     *b;
5643   PetscErrorCode ierr;
5644   PetscMPIInt    size;
5645 
5646   PetscFunctionBegin;
5647   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5648   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5649   B->data         = (void*)b;
5650   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5651   B->assembled    = PETSC_FALSE;
5652   B->insertmode   = NOT_SET_VALUES;
5653   b->size         = size;
5654   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5655 
5656   /* build cache for off array entries formed */
5657   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5658   b->donotstash  = PETSC_FALSE;
5659   b->colmap      = 0;
5660   b->garray      = 0;
5661   b->roworiented = PETSC_TRUE;
5662 
5663   /* stuff used for matrix vector multiply */
5664   b->lvec      = PETSC_NULL;
5665   b->Mvctx     = PETSC_NULL;
5666 
5667   /* stuff for MatGetRow() */
5668   b->rowindices   = 0;
5669   b->rowvalues    = 0;
5670   b->getrowactive = PETSC_FALSE;
5671 
5672   /* flexible pointer used in CUSP/CUSPARSE classes */
5673   b->spptr        = PETSC_NULL;
5674 
5675 #if defined(PETSC_HAVE_MUMPS)
5676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5677                                      "MatGetFactor_aij_mumps",
5678                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5679 #endif
5680 #if defined(PETSC_HAVE_PASTIX)
5681   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5682 					   "MatGetFactor_mpiaij_pastix",
5683 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5684 #endif
5685 #if defined(PETSC_HAVE_SUPERLU_DIST)
5686   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5687                                      "MatGetFactor_mpiaij_superlu_dist",
5688                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5689 #endif
5690 #if defined(PETSC_HAVE_CLIQUE)
5691   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5692                                      "MatGetFactor_aij_clique",
5693                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5694 #endif
5695   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5696                                      "MatStoreValues_MPIAIJ",
5697                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5699                                      "MatRetrieveValues_MPIAIJ",
5700                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5701   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5702 				     "MatGetDiagonalBlock_MPIAIJ",
5703                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5704   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5705 				     "MatIsTranspose_MPIAIJ",
5706 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5707   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5708 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5709 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5710   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5711 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5712 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5713   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5714 				     "MatDiagonalScaleLocal_MPIAIJ",
5715 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5716   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5717                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5718                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5719   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5720                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5721                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5722   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5723                                      "MatConvert_MPIAIJ_MPISBAIJ",
5724                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5725   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5726                                      "MatMatMult_MPIDense_MPIAIJ",
5727                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5728   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5729                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5730                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5731   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5732                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5733                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5734   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5735   PetscFunctionReturn(0);
5736 }
5737 EXTERN_C_END
5738 
5739 #undef __FUNCT__
5740 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5741 /*@
5742      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5743          and "off-diagonal" part of the matrix in CSR format.
5744 
5745    Collective on MPI_Comm
5746 
5747    Input Parameters:
5748 +  comm - MPI communicator
5749 .  m - number of local rows (Cannot be PETSC_DECIDE)
5750 .  n - This value should be the same as the local size used in creating the
5751        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5752        calculated if N is given) For square matrices n is almost always m.
5753 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5754 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5755 .   i - row indices for "diagonal" portion of matrix
5756 .   j - column indices
5757 .   a - matrix values
5758 .   oi - row indices for "off-diagonal" portion of matrix
5759 .   oj - column indices
5760 -   oa - matrix values
5761 
5762    Output Parameter:
5763 .   mat - the matrix
5764 
5765    Level: advanced
5766 
5767    Notes:
5768        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5769        must free the arrays once the matrix has been destroyed and not before.
5770 
5771        The i and j indices are 0 based
5772 
5773        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5774 
5775        This sets local rows and cannot be used to set off-processor values.
5776 
5777        You cannot later use MatSetValues() to change values in this matrix.
5778 
5779 .keywords: matrix, aij, compressed row, sparse, parallel
5780 
5781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5782           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5783 @*/
5784 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5785 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5786 {
5787   PetscErrorCode ierr;
5788   Mat_MPIAIJ     *maij;
5789 
5790  PetscFunctionBegin;
5791   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5792   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5793   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5794   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5795   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5796   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5797   maij = (Mat_MPIAIJ*) (*mat)->data;
5798   maij->donotstash     = PETSC_TRUE;
5799   (*mat)->preallocated = PETSC_TRUE;
5800 
5801   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5802   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5803 
5804   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5805   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5806 
5807   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5808   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5809   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5810   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5811 
5812   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5813   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5814   PetscFunctionReturn(0);
5815 }
5816 
5817 /*
5818     Special version for direct calls from Fortran
5819 */
5820 #include <petsc-private/fortranimpl.h>
5821 
5822 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5823 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5824 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5825 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5826 #endif
5827 
5828 /* Change these macros so can be used in void function */
5829 #undef CHKERRQ
5830 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5831 #undef SETERRQ2
5832 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5833 #undef SETERRQ3
5834 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5835 #undef SETERRQ
5836 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5837 
5838 EXTERN_C_BEGIN
5839 #undef __FUNCT__
5840 #define __FUNCT__ "matsetvaluesmpiaij_"
5841 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5842 {
5843   Mat             mat = *mmat;
5844   PetscInt        m = *mm, n = *mn;
5845   InsertMode      addv = *maddv;
5846   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5847   PetscScalar     value;
5848   PetscErrorCode  ierr;
5849 
5850   MatCheckPreallocated(mat,1);
5851   if (mat->insertmode == NOT_SET_VALUES) {
5852     mat->insertmode = addv;
5853   }
5854 #if defined(PETSC_USE_DEBUG)
5855   else if (mat->insertmode != addv) {
5856     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5857   }
5858 #endif
5859   {
5860   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5861   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5862   PetscBool       roworiented = aij->roworiented;
5863 
5864   /* Some Variables required in the macro */
5865   Mat             A = aij->A;
5866   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5867   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5868   MatScalar       *aa = a->a;
5869   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5870   Mat             B = aij->B;
5871   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5872   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5873   MatScalar       *ba = b->a;
5874 
5875   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5876   PetscInt        nonew = a->nonew;
5877   MatScalar       *ap1,*ap2;
5878 
5879   PetscFunctionBegin;
5880   for (i=0; i<m; i++) {
5881     if (im[i] < 0) continue;
5882 #if defined(PETSC_USE_DEBUG)
5883     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5884 #endif
5885     if (im[i] >= rstart && im[i] < rend) {
5886       row      = im[i] - rstart;
5887       lastcol1 = -1;
5888       rp1      = aj + ai[row];
5889       ap1      = aa + ai[row];
5890       rmax1    = aimax[row];
5891       nrow1    = ailen[row];
5892       low1     = 0;
5893       high1    = nrow1;
5894       lastcol2 = -1;
5895       rp2      = bj + bi[row];
5896       ap2      = ba + bi[row];
5897       rmax2    = bimax[row];
5898       nrow2    = bilen[row];
5899       low2     = 0;
5900       high2    = nrow2;
5901 
5902       for (j=0; j<n; j++) {
5903         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5904         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5905         if (in[j] >= cstart && in[j] < cend){
5906           col = in[j] - cstart;
5907           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5908         } else if (in[j] < 0) continue;
5909 #if defined(PETSC_USE_DEBUG)
5910         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5911 #endif
5912         else {
5913           if (mat->was_assembled) {
5914             if (!aij->colmap) {
5915               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5916             }
5917 #if defined (PETSC_USE_CTABLE)
5918             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5919 	    col--;
5920 #else
5921             col = aij->colmap[in[j]] - 1;
5922 #endif
5923             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5924               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5925               col =  in[j];
5926               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5927               B = aij->B;
5928               b = (Mat_SeqAIJ*)B->data;
5929               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5930               rp2      = bj + bi[row];
5931               ap2      = ba + bi[row];
5932               rmax2    = bimax[row];
5933               nrow2    = bilen[row];
5934               low2     = 0;
5935               high2    = nrow2;
5936               bm       = aij->B->rmap->n;
5937               ba = b->a;
5938             }
5939           } else col = in[j];
5940           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5941         }
5942       }
5943     } else {
5944       if (!aij->donotstash) {
5945         if (roworiented) {
5946           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5947         } else {
5948           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5949         }
5950       }
5951     }
5952   }}
5953   PetscFunctionReturnVoid();
5954 }
5955 EXTERN_C_END
5956 
5957