xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 43f9ffa7fee3e506d2e7ed45ea09f47c334a6d12)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   MPI_Comm       comm;
1725   PetscInt       first,local_rowsize,local_colsize;
1726   const PetscInt *rows;
1727   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1732   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1733   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1734   /* collect the global row permutation and invert it */
1735   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1736   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1737   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1738   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1739   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1740   /* get the local target indices */
1741   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1742   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1743   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1744   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1745   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1746   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1747   /* the column permutation is so much easier;
1748      make a local version of 'colp' and invert it */
1749   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1750   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1751   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1752   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1753   /* now we just get the submatrix */
1754   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1755   /* clean up */
1756   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1757   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1763 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1764 {
1765   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1766   Mat            A = mat->A,B = mat->B;
1767   PetscErrorCode ierr;
1768   PetscReal      isend[5],irecv[5];
1769 
1770   PetscFunctionBegin;
1771   info->block_size     = 1.0;
1772   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1773   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1774   isend[3] = info->memory;  isend[4] = info->mallocs;
1775   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1776   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1777   isend[3] += info->memory;  isend[4] += info->mallocs;
1778   if (flag == MAT_LOCAL) {
1779     info->nz_used      = isend[0];
1780     info->nz_allocated = isend[1];
1781     info->nz_unneeded  = isend[2];
1782     info->memory       = isend[3];
1783     info->mallocs      = isend[4];
1784   } else if (flag == MAT_GLOBAL_MAX) {
1785     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   } else if (flag == MAT_GLOBAL_SUM) {
1792     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1793     info->nz_used      = irecv[0];
1794     info->nz_allocated = irecv[1];
1795     info->nz_unneeded  = irecv[2];
1796     info->memory       = irecv[3];
1797     info->mallocs      = irecv[4];
1798   }
1799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1800   info->fill_ratio_needed = 0;
1801   info->factor_mallocs    = 0;
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatSetOption_MPIAIJ"
1807 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1808 {
1809   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1810   PetscErrorCode ierr;
1811 
1812   PetscFunctionBegin;
1813   switch (op) {
1814   case MAT_NEW_NONZERO_LOCATIONS:
1815   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1816   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1817   case MAT_KEEP_NONZERO_PATTERN:
1818   case MAT_NEW_NONZERO_LOCATION_ERR:
1819   case MAT_USE_INODES:
1820   case MAT_IGNORE_ZERO_ENTRIES:
1821     MatCheckPreallocated(A,1);
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_ROW_ORIENTED:
1826     a->roworiented = flg;
1827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1828     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1829     break;
1830   case MAT_NEW_DIAGONALS:
1831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1832     break;
1833   case MAT_IGNORE_OFF_PROC_ENTRIES:
1834     a->donotstash = flg;
1835     break;
1836   case MAT_SPD:
1837     A->spd_set                         = PETSC_TRUE;
1838     A->spd                             = flg;
1839     if (flg) {
1840       A->symmetric                     = PETSC_TRUE;
1841       A->structurally_symmetric        = PETSC_TRUE;
1842       A->symmetric_set                 = PETSC_TRUE;
1843       A->structurally_symmetric_set    = PETSC_TRUE;
1844     }
1845     break;
1846   case MAT_SYMMETRIC:
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_STRUCTURALLY_SYMMETRIC:
1850     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1851     break;
1852   case MAT_HERMITIAN:
1853     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1854     break;
1855   case MAT_SYMMETRY_ETERNAL:
1856     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1857     break;
1858   default:
1859     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatGetRow_MPIAIJ"
1866 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1867 {
1868   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1869   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1870   PetscErrorCode ierr;
1871   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1872   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1873   PetscInt       *cmap,*idx_p;
1874 
1875   PetscFunctionBegin;
1876   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1877   mat->getrowactive = PETSC_TRUE;
1878 
1879   if (!mat->rowvalues && (idx || v)) {
1880     /*
1881         allocate enough space to hold information from the longest row.
1882     */
1883     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1884     PetscInt   max = 1,tmp;
1885     for (i=0; i<matin->rmap->n; i++) {
1886       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1887       if (max < tmp) { max = tmp; }
1888     }
1889     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1890   }
1891 
1892   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1893   lrow = row - rstart;
1894 
1895   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1896   if (!v)   {pvA = 0; pvB = 0;}
1897   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1898   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1899   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1900   nztot = nzA + nzB;
1901 
1902   cmap  = mat->garray;
1903   if (v  || idx) {
1904     if (nztot) {
1905       /* Sort by increasing column numbers, assuming A and B already sorted */
1906       PetscInt imark = -1;
1907       if (v) {
1908         *v = v_p = mat->rowvalues;
1909         for (i=0; i<nzB; i++) {
1910           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1911           else break;
1912         }
1913         imark = i;
1914         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1915         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1916       }
1917       if (idx) {
1918         *idx = idx_p = mat->rowindices;
1919         if (imark > -1) {
1920           for (i=0; i<imark; i++) {
1921             idx_p[i] = cmap[cworkB[i]];
1922           }
1923         } else {
1924           for (i=0; i<nzB; i++) {
1925             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1926             else break;
1927           }
1928           imark = i;
1929         }
1930         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1931         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1932       }
1933     } else {
1934       if (idx) *idx = 0;
1935       if (v)   *v   = 0;
1936     }
1937   }
1938   *nz = nztot;
1939   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1940   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1946 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1947 {
1948   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1949 
1950   PetscFunctionBegin;
1951   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1952   aij->getrowactive = PETSC_FALSE;
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatNorm_MPIAIJ"
1958 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1959 {
1960   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1961   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1962   PetscErrorCode ierr;
1963   PetscInt       i,j,cstart = mat->cmap->rstart;
1964   PetscReal      sum = 0.0;
1965   MatScalar      *v;
1966 
1967   PetscFunctionBegin;
1968   if (aij->size == 1) {
1969     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1970   } else {
1971     if (type == NORM_FROBENIUS) {
1972       v = amat->a;
1973       for (i=0; i<amat->nz; i++) {
1974 #if defined(PETSC_USE_COMPLEX)
1975         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1976 #else
1977         sum += (*v)*(*v); v++;
1978 #endif
1979       }
1980       v = bmat->a;
1981       for (i=0; i<bmat->nz; i++) {
1982 #if defined(PETSC_USE_COMPLEX)
1983         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1984 #else
1985         sum += (*v)*(*v); v++;
1986 #endif
1987       }
1988       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1989       *norm = PetscSqrtReal(*norm);
1990     } else if (type == NORM_1) { /* max column norm */
1991       PetscReal *tmp,*tmp2;
1992       PetscInt  *jj,*garray = aij->garray;
1993       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1994       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1995       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1996       *norm = 0.0;
1997       v = amat->a; jj = amat->j;
1998       for (j=0; j<amat->nz; j++) {
1999         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2000       }
2001       v = bmat->a; jj = bmat->j;
2002       for (j=0; j<bmat->nz; j++) {
2003         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2004       }
2005       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2006       for (j=0; j<mat->cmap->N; j++) {
2007         if (tmp2[j] > *norm) *norm = tmp2[j];
2008       }
2009       ierr = PetscFree(tmp);CHKERRQ(ierr);
2010       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2011     } else if (type == NORM_INFINITY) { /* max row norm */
2012       PetscReal ntemp = 0.0;
2013       for (j=0; j<aij->A->rmap->n; j++) {
2014         v = amat->a + amat->i[j];
2015         sum = 0.0;
2016         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2017           sum += PetscAbsScalar(*v); v++;
2018         }
2019         v = bmat->a + bmat->i[j];
2020         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2021           sum += PetscAbsScalar(*v); v++;
2022         }
2023         if (sum > ntemp) ntemp = sum;
2024       }
2025       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2026     } else {
2027       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2028     }
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatTranspose_MPIAIJ"
2035 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2036 {
2037   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2038   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2039   PetscErrorCode ierr;
2040   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2041   PetscInt       cstart=A->cmap->rstart,ncol;
2042   Mat            B;
2043   MatScalar      *array;
2044 
2045   PetscFunctionBegin;
2046   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2047 
2048   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2049   ai = Aloc->i; aj = Aloc->j;
2050   bi = Bloc->i; bj = Bloc->j;
2051   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2052     PetscInt *d_nnz,*g_nnz,*o_nnz;
2053     PetscSFNode *oloc;
2054     PetscSF sf;
2055 
2056     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2057     /* compute d_nnz for preallocation */
2058     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2059     for (i=0; i<ai[ma]; i++) {
2060       d_nnz[aj[i]] ++;
2061       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2062     }
2063     /* compute local off-diagonal contributions */
2064     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2065     /* map those to global */
2066     for (i=0; i<nb; i++) {
2067       PetscInt gcol = a->garray[i],colowner;
2068       ierr = PetscLayoutFindOwner(A->cmap,a->garray[i],&colowner);CHKERRQ(ierr);
2069       oloc[i].rank  = colowner;
2070       oloc[i].index = gcol - A->cmap->range[colowner];
2071     }
2072     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2073     ierr = PetscSFSetGraph(sf,na,nb,PETSC_NULL,PETSC_USE_POINTER,oloc,PETSC_USE_POINTER);CHKERRQ(ierr);
2074     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2075     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2076     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2077     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2078 
2079     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2080     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2081     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2082     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2083     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2084     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2085   } else {
2086     B = *matout;
2087     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2088     for (i=0; i<ai[ma]; i++){
2089       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2090     }
2091   }
2092 
2093   /* copy over the A part */
2094   array = Aloc->a;
2095   row = A->rmap->rstart;
2096   for (i=0; i<ma; i++) {
2097     ncol = ai[i+1]-ai[i];
2098     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2099     row++; array += ncol; aj += ncol;
2100   }
2101   aj = Aloc->j;
2102   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2103 
2104   /* copy over the B part */
2105   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2106   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2107   array = Bloc->a;
2108   row = A->rmap->rstart;
2109   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2110   cols_tmp = cols;
2111   for (i=0; i<mb; i++) {
2112     ncol = bi[i+1]-bi[i];
2113     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2114     row++; array += ncol; cols_tmp += ncol;
2115   }
2116   ierr = PetscFree(cols);CHKERRQ(ierr);
2117 
2118   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2119   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2120   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2121     *matout = B;
2122   } else {
2123     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2124   }
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2130 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2131 {
2132   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2133   Mat            a = aij->A,b = aij->B;
2134   PetscErrorCode ierr;
2135   PetscInt       s1,s2,s3;
2136 
2137   PetscFunctionBegin;
2138   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2139   if (rr) {
2140     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2141     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2142     /* Overlap communication with computation. */
2143     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2144   }
2145   if (ll) {
2146     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2147     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2148     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2149   }
2150   /* scale  the diagonal block */
2151   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2152 
2153   if (rr) {
2154     /* Do a scatter end and then right scale the off-diagonal block */
2155     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2156     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2157   }
2158 
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 #undef __FUNCT__
2163 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2164 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2165 {
2166   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2167   PetscErrorCode ierr;
2168 
2169   PetscFunctionBegin;
2170   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatEqual_MPIAIJ"
2176 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2177 {
2178   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2179   Mat            a,b,c,d;
2180   PetscBool      flg;
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   a = matA->A; b = matA->B;
2185   c = matB->A; d = matB->B;
2186 
2187   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2188   if (flg) {
2189     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2190   }
2191   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatCopy_MPIAIJ"
2197 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2201   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2202 
2203   PetscFunctionBegin;
2204   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2205   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2206     /* because of the column compression in the off-processor part of the matrix a->B,
2207        the number of columns in a->B and b->B may be different, hence we cannot call
2208        the MatCopy() directly on the two parts. If need be, we can provide a more
2209        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2210        then copying the submatrices */
2211     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2212   } else {
2213     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2214     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatSetUp_MPIAIJ"
2221 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2222 {
2223   PetscErrorCode ierr;
2224 
2225   PetscFunctionBegin;
2226   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 #undef __FUNCT__
2231 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2232 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2233 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2234 {
2235   PetscInt          i,m=Y->rmap->N;
2236   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2237   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2238   const PetscInt    *xi = x->i,*yi = y->i;
2239 
2240   PetscFunctionBegin;
2241   /* Set the number of nonzeros in the new matrix */
2242   for (i=0; i<m; i++) {
2243     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2244     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2245     nnz[i] = 0;
2246     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2247       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2248       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2249       nnz[i]++;
2250     }
2251     for (; k<nzy; k++) nnz[i]++;
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatAXPY_MPIAIJ"
2258 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2259 {
2260   PetscErrorCode ierr;
2261   PetscInt       i;
2262   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2263   PetscBLASInt   bnz,one=1;
2264   Mat_SeqAIJ     *x,*y;
2265 
2266   PetscFunctionBegin;
2267   if (str == SAME_NONZERO_PATTERN) {
2268     PetscScalar alpha = a;
2269     x = (Mat_SeqAIJ *)xx->A->data;
2270     y = (Mat_SeqAIJ *)yy->A->data;
2271     bnz = PetscBLASIntCast(x->nz);
2272     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2273     x = (Mat_SeqAIJ *)xx->B->data;
2274     y = (Mat_SeqAIJ *)yy->B->data;
2275     bnz = PetscBLASIntCast(x->nz);
2276     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2277   } else if (str == SUBSET_NONZERO_PATTERN) {
2278     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2279 
2280     x = (Mat_SeqAIJ *)xx->B->data;
2281     y = (Mat_SeqAIJ *)yy->B->data;
2282     if (y->xtoy && y->XtoY != xx->B) {
2283       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2284       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2285     }
2286     if (!y->xtoy) { /* get xtoy */
2287       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2288       y->XtoY = xx->B;
2289       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2290     }
2291     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2292   } else {
2293     Mat B;
2294     PetscInt *nnz_d,*nnz_o;
2295     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2296     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2297     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2298     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2299     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2300     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2301     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2302     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2303     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2304     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2305     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2306     ierr = MatHeaderReplace(Y,B);
2307     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2308     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2309   }
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2314 
2315 #undef __FUNCT__
2316 #define __FUNCT__ "MatConjugate_MPIAIJ"
2317 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2318 {
2319 #if defined(PETSC_USE_COMPLEX)
2320   PetscErrorCode ierr;
2321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2322 
2323   PetscFunctionBegin;
2324   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2325   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2326 #else
2327   PetscFunctionBegin;
2328 #endif
2329   PetscFunctionReturn(0);
2330 }
2331 
2332 #undef __FUNCT__
2333 #define __FUNCT__ "MatRealPart_MPIAIJ"
2334 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2335 {
2336   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2337   PetscErrorCode ierr;
2338 
2339   PetscFunctionBegin;
2340   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2341   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2347 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2348 {
2349   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2350   PetscErrorCode ierr;
2351 
2352   PetscFunctionBegin;
2353   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2354   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 #ifdef PETSC_HAVE_PBGL
2359 
2360 #include <boost/parallel/mpi/bsp_process_group.hpp>
2361 #include <boost/graph/distributed/ilu_default_graph.hpp>
2362 #include <boost/graph/distributed/ilu_0_block.hpp>
2363 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2364 #include <boost/graph/distributed/petsc/interface.hpp>
2365 #include <boost/multi_array.hpp>
2366 #include <boost/parallel/distributed_property_map->hpp>
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2370 /*
2371   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2372 */
2373 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2374 {
2375   namespace petsc = boost::distributed::petsc;
2376 
2377   namespace graph_dist = boost::graph::distributed;
2378   using boost::graph::distributed::ilu_default::process_group_type;
2379   using boost::graph::ilu_permuted;
2380 
2381   PetscBool       row_identity, col_identity;
2382   PetscContainer  c;
2383   PetscInt        m, n, M, N;
2384   PetscErrorCode  ierr;
2385 
2386   PetscFunctionBegin;
2387   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2388   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2389   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2390   if (!row_identity || !col_identity) {
2391     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2392   }
2393 
2394   process_group_type pg;
2395   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2396   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2397   lgraph_type&   level_graph = *lgraph_p;
2398   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2399 
2400   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2401   ilu_permuted(level_graph);
2402 
2403   /* put together the new matrix */
2404   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2405   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2406   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2407   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2408   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2409   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2410   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2411   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2412 
2413   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2414   ierr = PetscContainerSetPointer(c, lgraph_p);
2415   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2416   ierr = PetscContainerDestroy(&c);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2422 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2423 {
2424   PetscFunctionBegin;
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatSolve_MPIAIJ"
2430 /*
2431   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2432 */
2433 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2434 {
2435   namespace graph_dist = boost::graph::distributed;
2436 
2437   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2438   lgraph_type*   lgraph_p;
2439   PetscContainer c;
2440   PetscErrorCode ierr;
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2444   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2445   ierr = VecCopy(b, x);CHKERRQ(ierr);
2446 
2447   PetscScalar* array_x;
2448   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2449   PetscInt sx;
2450   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2451 
2452   PetscScalar* array_b;
2453   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2454   PetscInt sb;
2455   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2456 
2457   lgraph_type&   level_graph = *lgraph_p;
2458   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2459 
2460   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2461   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2462                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2463 
2464   typedef boost::iterator_property_map<array_ref_type::iterator,
2465                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2466   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2467                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2468 
2469   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2470 
2471   PetscFunctionReturn(0);
2472 }
2473 #endif
2474 
2475 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2476   PetscInt       nzlocal,nsends,nrecvs;
2477   PetscMPIInt    *send_rank,*recv_rank;
2478   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2479   PetscScalar    *sbuf_a,**rbuf_a;
2480   PetscErrorCode (*Destroy)(Mat);
2481 } Mat_Redundant;
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2485 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2486 {
2487   PetscErrorCode       ierr;
2488   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2489   PetscInt             i;
2490 
2491   PetscFunctionBegin;
2492   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2493   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2494   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2495   for (i=0; i<redund->nrecvs; i++){
2496     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2497     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2498   }
2499   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2500   ierr = PetscFree(redund);CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatDestroy_MatRedundant"
2506 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2507 {
2508   PetscErrorCode  ierr;
2509   PetscContainer  container;
2510   Mat_Redundant   *redund=PETSC_NULL;
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2514   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2515   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2516   A->ops->destroy = redund->Destroy;
2517   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2518   if (A->ops->destroy) {
2519     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2520   }
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2526 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2527 {
2528   PetscMPIInt    rank,size;
2529   MPI_Comm       comm=((PetscObject)mat)->comm;
2530   PetscErrorCode ierr;
2531   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2532   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2533   PetscInt       *rowrange=mat->rmap->range;
2534   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2535   Mat            A=aij->A,B=aij->B,C=*matredundant;
2536   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2537   PetscScalar    *sbuf_a;
2538   PetscInt       nzlocal=a->nz+b->nz;
2539   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2540   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2541   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2542   MatScalar      *aworkA,*aworkB;
2543   PetscScalar    *vals;
2544   PetscMPIInt    tag1,tag2,tag3,imdex;
2545   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2546                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2547   MPI_Status     recv_status,*send_status;
2548   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2549   PetscInt       **rbuf_j=PETSC_NULL;
2550   PetscScalar    **rbuf_a=PETSC_NULL;
2551   Mat_Redundant  *redund=PETSC_NULL;
2552   PetscContainer container;
2553 
2554   PetscFunctionBegin;
2555   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2556   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2557 
2558   if (reuse == MAT_REUSE_MATRIX) {
2559     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2560     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2561     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2562     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2563     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2564     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2565     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2566     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2567 
2568     nsends    = redund->nsends;
2569     nrecvs    = redund->nrecvs;
2570     send_rank = redund->send_rank;
2571     recv_rank = redund->recv_rank;
2572     sbuf_nz   = redund->sbuf_nz;
2573     rbuf_nz   = redund->rbuf_nz;
2574     sbuf_j    = redund->sbuf_j;
2575     sbuf_a    = redund->sbuf_a;
2576     rbuf_j    = redund->rbuf_j;
2577     rbuf_a    = redund->rbuf_a;
2578   }
2579 
2580   if (reuse == MAT_INITIAL_MATRIX){
2581     PetscMPIInt  subrank,subsize;
2582     PetscInt     nleftover,np_subcomm;
2583     /* get the destination processors' id send_rank, nsends and nrecvs */
2584     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2585     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2586     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2587     np_subcomm = size/nsubcomm;
2588     nleftover  = size - nsubcomm*np_subcomm;
2589     nsends = 0; nrecvs = 0;
2590     for (i=0; i<size; i++){ /* i=rank*/
2591       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2592         send_rank[nsends] = i; nsends++;
2593         recv_rank[nrecvs++] = i;
2594       }
2595     }
2596     if (rank >= size - nleftover){/* this proc is a leftover processor */
2597       i = size-nleftover-1;
2598       j = 0;
2599       while (j < nsubcomm - nleftover){
2600         send_rank[nsends++] = i;
2601         i--; j++;
2602       }
2603     }
2604 
2605     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2606       for (i=0; i<nleftover; i++){
2607         recv_rank[nrecvs++] = size-nleftover+i;
2608       }
2609     }
2610 
2611     /* allocate sbuf_j, sbuf_a */
2612     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2613     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2614     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2615   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2616 
2617   /* copy mat's local entries into the buffers */
2618   if (reuse == MAT_INITIAL_MATRIX){
2619     rownz_max = 0;
2620     rptr = sbuf_j;
2621     cols = sbuf_j + rend-rstart + 1;
2622     vals = sbuf_a;
2623     rptr[0] = 0;
2624     for (i=0; i<rend-rstart; i++){
2625       row = i + rstart;
2626       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2627       ncols  = nzA + nzB;
2628       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2629       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2630       /* load the column indices for this row into cols */
2631       lwrite = 0;
2632       for (l=0; l<nzB; l++) {
2633         if ((ctmp = bmap[cworkB[l]]) < cstart){
2634           vals[lwrite]   = aworkB[l];
2635           cols[lwrite++] = ctmp;
2636         }
2637       }
2638       for (l=0; l<nzA; l++){
2639         vals[lwrite]   = aworkA[l];
2640         cols[lwrite++] = cstart + cworkA[l];
2641       }
2642       for (l=0; l<nzB; l++) {
2643         if ((ctmp = bmap[cworkB[l]]) >= cend){
2644           vals[lwrite]   = aworkB[l];
2645           cols[lwrite++] = ctmp;
2646         }
2647       }
2648       vals += ncols;
2649       cols += ncols;
2650       rptr[i+1] = rptr[i] + ncols;
2651       if (rownz_max < ncols) rownz_max = ncols;
2652     }
2653     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2654   } else { /* only copy matrix values into sbuf_a */
2655     rptr = sbuf_j;
2656     vals = sbuf_a;
2657     rptr[0] = 0;
2658     for (i=0; i<rend-rstart; i++){
2659       row = i + rstart;
2660       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2661       ncols  = nzA + nzB;
2662       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2663       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2664       lwrite = 0;
2665       for (l=0; l<nzB; l++) {
2666         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2667       }
2668       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2669       for (l=0; l<nzB; l++) {
2670         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2671       }
2672       vals += ncols;
2673       rptr[i+1] = rptr[i] + ncols;
2674     }
2675   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2676 
2677   /* send nzlocal to others, and recv other's nzlocal */
2678   /*--------------------------------------------------*/
2679   if (reuse == MAT_INITIAL_MATRIX){
2680     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2681     s_waits2 = s_waits3 + nsends;
2682     s_waits1 = s_waits2 + nsends;
2683     r_waits1 = s_waits1 + nsends;
2684     r_waits2 = r_waits1 + nrecvs;
2685     r_waits3 = r_waits2 + nrecvs;
2686   } else {
2687     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2688     r_waits3 = s_waits3 + nsends;
2689   }
2690 
2691   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2692   if (reuse == MAT_INITIAL_MATRIX){
2693     /* get new tags to keep the communication clean */
2694     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2695     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2696     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2697 
2698     /* post receives of other's nzlocal */
2699     for (i=0; i<nrecvs; i++){
2700       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2701     }
2702     /* send nzlocal to others */
2703     for (i=0; i<nsends; i++){
2704       sbuf_nz[i] = nzlocal;
2705       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2706     }
2707     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2708     count = nrecvs;
2709     while (count) {
2710       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2711       recv_rank[imdex] = recv_status.MPI_SOURCE;
2712       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2713       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2714 
2715       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2716       rbuf_nz[imdex] += i + 2;
2717       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2718       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2719       count--;
2720     }
2721     /* wait on sends of nzlocal */
2722     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2723     /* send mat->i,j to others, and recv from other's */
2724     /*------------------------------------------------*/
2725     for (i=0; i<nsends; i++){
2726       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2727       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2728     }
2729     /* wait on receives of mat->i,j */
2730     /*------------------------------*/
2731     count = nrecvs;
2732     while (count) {
2733       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2734       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2735       count--;
2736     }
2737     /* wait on sends of mat->i,j */
2738     /*---------------------------*/
2739     if (nsends) {
2740       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2741     }
2742   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2743 
2744   /* post receives, send and receive mat->a */
2745   /*----------------------------------------*/
2746   for (imdex=0; imdex<nrecvs; imdex++) {
2747     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2748   }
2749   for (i=0; i<nsends; i++){
2750     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2751   }
2752   count = nrecvs;
2753   while (count) {
2754     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2755     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2756     count--;
2757   }
2758   if (nsends) {
2759     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2760   }
2761 
2762   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2763 
2764   /* create redundant matrix */
2765   /*-------------------------*/
2766   if (reuse == MAT_INITIAL_MATRIX){
2767     /* compute rownz_max for preallocation */
2768     for (imdex=0; imdex<nrecvs; imdex++){
2769       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2770       rptr = rbuf_j[imdex];
2771       for (i=0; i<j; i++){
2772         ncols = rptr[i+1] - rptr[i];
2773         if (rownz_max < ncols) rownz_max = ncols;
2774       }
2775     }
2776 
2777     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2778     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2779     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2780     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2781     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2782     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2783   } else {
2784     C = *matredundant;
2785   }
2786 
2787   /* insert local matrix entries */
2788   rptr = sbuf_j;
2789   cols = sbuf_j + rend-rstart + 1;
2790   vals = sbuf_a;
2791   for (i=0; i<rend-rstart; i++){
2792     row   = i + rstart;
2793     ncols = rptr[i+1] - rptr[i];
2794     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2795     vals += ncols;
2796     cols += ncols;
2797   }
2798   /* insert received matrix entries */
2799   for (imdex=0; imdex<nrecvs; imdex++){
2800     rstart = rowrange[recv_rank[imdex]];
2801     rend   = rowrange[recv_rank[imdex]+1];
2802     rptr = rbuf_j[imdex];
2803     cols = rbuf_j[imdex] + rend-rstart + 1;
2804     vals = rbuf_a[imdex];
2805     for (i=0; i<rend-rstart; i++){
2806       row   = i + rstart;
2807       ncols = rptr[i+1] - rptr[i];
2808       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2809       vals += ncols;
2810       cols += ncols;
2811     }
2812   }
2813   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2814   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2815   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2816   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2817   if (reuse == MAT_INITIAL_MATRIX) {
2818     PetscContainer container;
2819     *matredundant = C;
2820     /* create a supporting struct and attach it to C for reuse */
2821     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2822     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2823     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2824     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2825     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2826     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2827 
2828     redund->nzlocal = nzlocal;
2829     redund->nsends  = nsends;
2830     redund->nrecvs  = nrecvs;
2831     redund->send_rank = send_rank;
2832     redund->recv_rank = recv_rank;
2833     redund->sbuf_nz = sbuf_nz;
2834     redund->rbuf_nz = rbuf_nz;
2835     redund->sbuf_j  = sbuf_j;
2836     redund->sbuf_a  = sbuf_a;
2837     redund->rbuf_j  = rbuf_j;
2838     redund->rbuf_a  = rbuf_a;
2839 
2840     redund->Destroy = C->ops->destroy;
2841     C->ops->destroy = MatDestroy_MatRedundant;
2842   }
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 #undef __FUNCT__
2847 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2848 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2849 {
2850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2851   PetscErrorCode ierr;
2852   PetscInt       i,*idxb = 0;
2853   PetscScalar    *va,*vb;
2854   Vec            vtmp;
2855 
2856   PetscFunctionBegin;
2857   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2858   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2859   if (idx) {
2860     for (i=0; i<A->rmap->n; i++) {
2861       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2862     }
2863   }
2864 
2865   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2866   if (idx) {
2867     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2868   }
2869   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2870   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2871 
2872   for (i=0; i<A->rmap->n; i++){
2873     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2874       va[i] = vb[i];
2875       if (idx) idx[i] = a->garray[idxb[i]];
2876     }
2877   }
2878 
2879   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2880   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2881   ierr = PetscFree(idxb);CHKERRQ(ierr);
2882   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 #undef __FUNCT__
2887 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2888 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2889 {
2890   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2891   PetscErrorCode ierr;
2892   PetscInt       i,*idxb = 0;
2893   PetscScalar    *va,*vb;
2894   Vec            vtmp;
2895 
2896   PetscFunctionBegin;
2897   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2898   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2899   if (idx) {
2900     for (i=0; i<A->cmap->n; i++) {
2901       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2902     }
2903   }
2904 
2905   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2906   if (idx) {
2907     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2908   }
2909   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2910   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2911 
2912   for (i=0; i<A->rmap->n; i++){
2913     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2914       va[i] = vb[i];
2915       if (idx) idx[i] = a->garray[idxb[i]];
2916     }
2917   }
2918 
2919   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2920   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2921   ierr = PetscFree(idxb);CHKERRQ(ierr);
2922   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 #undef __FUNCT__
2927 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2928 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2929 {
2930   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2931   PetscInt       n      = A->rmap->n;
2932   PetscInt       cstart = A->cmap->rstart;
2933   PetscInt      *cmap   = mat->garray;
2934   PetscInt      *diagIdx, *offdiagIdx;
2935   Vec            diagV, offdiagV;
2936   PetscScalar   *a, *diagA, *offdiagA;
2937   PetscInt       r;
2938   PetscErrorCode ierr;
2939 
2940   PetscFunctionBegin;
2941   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2942   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2943   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2944   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2945   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2946   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2947   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2948   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2949   for (r = 0; r < n; ++r) {
2950     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2951       a[r]   = diagA[r];
2952       idx[r] = cstart + diagIdx[r];
2953     } else {
2954       a[r]   = offdiagA[r];
2955       idx[r] = cmap[offdiagIdx[r]];
2956     }
2957   }
2958   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2959   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2960   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2961   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2962   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2963   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 #undef __FUNCT__
2968 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2969 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2970 {
2971   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2972   PetscInt       n      = A->rmap->n;
2973   PetscInt       cstart = A->cmap->rstart;
2974   PetscInt      *cmap   = mat->garray;
2975   PetscInt      *diagIdx, *offdiagIdx;
2976   Vec            diagV, offdiagV;
2977   PetscScalar   *a, *diagA, *offdiagA;
2978   PetscInt       r;
2979   PetscErrorCode ierr;
2980 
2981   PetscFunctionBegin;
2982   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2983   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2984   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2985   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2986   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2987   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2988   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2989   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2990   for (r = 0; r < n; ++r) {
2991     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2992       a[r]   = diagA[r];
2993       idx[r] = cstart + diagIdx[r];
2994     } else {
2995       a[r]   = offdiagA[r];
2996       idx[r] = cmap[offdiagIdx[r]];
2997     }
2998   }
2999   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3000   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3001   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3002   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3003   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3004   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3005   PetscFunctionReturn(0);
3006 }
3007 
3008 #undef __FUNCT__
3009 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3010 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3011 {
3012   PetscErrorCode ierr;
3013   Mat            *dummy;
3014 
3015   PetscFunctionBegin;
3016   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3017   *newmat = *dummy;
3018   ierr = PetscFree(dummy);CHKERRQ(ierr);
3019   PetscFunctionReturn(0);
3020 }
3021 
3022 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3023 
3024 #undef __FUNCT__
3025 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3026 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3027 {
3028   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3029   PetscErrorCode ierr;
3030 
3031   PetscFunctionBegin;
3032   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 #undef __FUNCT__
3037 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3038 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3039 {
3040   PetscErrorCode ierr;
3041   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3042 
3043   PetscFunctionBegin;
3044   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3045   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3046   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3047   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 /* -------------------------------------------------------------------*/
3052 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3053        MatGetRow_MPIAIJ,
3054        MatRestoreRow_MPIAIJ,
3055        MatMult_MPIAIJ,
3056 /* 4*/ MatMultAdd_MPIAIJ,
3057        MatMultTranspose_MPIAIJ,
3058        MatMultTransposeAdd_MPIAIJ,
3059 #ifdef PETSC_HAVE_PBGL
3060        MatSolve_MPIAIJ,
3061 #else
3062        0,
3063 #endif
3064        0,
3065        0,
3066 /*10*/ 0,
3067        0,
3068        0,
3069        MatSOR_MPIAIJ,
3070        MatTranspose_MPIAIJ,
3071 /*15*/ MatGetInfo_MPIAIJ,
3072        MatEqual_MPIAIJ,
3073        MatGetDiagonal_MPIAIJ,
3074        MatDiagonalScale_MPIAIJ,
3075        MatNorm_MPIAIJ,
3076 /*20*/ MatAssemblyBegin_MPIAIJ,
3077        MatAssemblyEnd_MPIAIJ,
3078        MatSetOption_MPIAIJ,
3079        MatZeroEntries_MPIAIJ,
3080 /*24*/ MatZeroRows_MPIAIJ,
3081        0,
3082 #ifdef PETSC_HAVE_PBGL
3083        0,
3084 #else
3085        0,
3086 #endif
3087        0,
3088        0,
3089 /*29*/ MatSetUp_MPIAIJ,
3090 #ifdef PETSC_HAVE_PBGL
3091        0,
3092 #else
3093        0,
3094 #endif
3095        0,
3096        0,
3097        0,
3098 /*34*/ MatDuplicate_MPIAIJ,
3099        0,
3100        0,
3101        0,
3102        0,
3103 /*39*/ MatAXPY_MPIAIJ,
3104        MatGetSubMatrices_MPIAIJ,
3105        MatIncreaseOverlap_MPIAIJ,
3106        MatGetValues_MPIAIJ,
3107        MatCopy_MPIAIJ,
3108 /*44*/ MatGetRowMax_MPIAIJ,
3109        MatScale_MPIAIJ,
3110        0,
3111        0,
3112        MatZeroRowsColumns_MPIAIJ,
3113 /*49*/ MatSetRandom_MPIAIJ,
3114        0,
3115        0,
3116        0,
3117        0,
3118 /*54*/ MatFDColoringCreate_MPIAIJ,
3119        0,
3120        MatSetUnfactored_MPIAIJ,
3121        0, /* MatPermute_MPIAIJ, impl currently broken */
3122        0,
3123 /*59*/ MatGetSubMatrix_MPIAIJ,
3124        MatDestroy_MPIAIJ,
3125        MatView_MPIAIJ,
3126        0,
3127        0,
3128 /*64*/ 0,
3129        0,
3130        0,
3131        0,
3132        0,
3133 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3134        MatGetRowMinAbs_MPIAIJ,
3135        0,
3136        MatSetColoring_MPIAIJ,
3137 #if defined(PETSC_HAVE_ADIC)
3138        MatSetValuesAdic_MPIAIJ,
3139 #else
3140        0,
3141 #endif
3142        MatSetValuesAdifor_MPIAIJ,
3143 /*75*/ MatFDColoringApply_AIJ,
3144        0,
3145        0,
3146        0,
3147        MatFindZeroDiagonals_MPIAIJ,
3148 /*80*/ 0,
3149        0,
3150        0,
3151 /*83*/ MatLoad_MPIAIJ,
3152        0,
3153        0,
3154        0,
3155        0,
3156        0,
3157 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3158        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3159        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3160        MatPtAP_Basic,
3161        MatPtAPSymbolic_MPIAIJ,
3162 /*94*/ MatPtAPNumeric_MPIAIJ,
3163        0,
3164        0,
3165        0,
3166        0,
3167 /*99*/ 0,
3168        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3169        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3170        MatConjugate_MPIAIJ,
3171        0,
3172 /*104*/MatSetValuesRow_MPIAIJ,
3173        MatRealPart_MPIAIJ,
3174        MatImaginaryPart_MPIAIJ,
3175        0,
3176        0,
3177 /*109*/0,
3178        MatGetRedundantMatrix_MPIAIJ,
3179        MatGetRowMin_MPIAIJ,
3180        0,
3181        0,
3182 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3183        0,
3184        0,
3185        0,
3186        0,
3187 /*119*/0,
3188        0,
3189        0,
3190        0,
3191        MatGetMultiProcBlock_MPIAIJ,
3192 /*124*/MatFindNonzeroRows_MPIAIJ,
3193        MatGetColumnNorms_MPIAIJ,
3194        MatInvertBlockDiagonal_MPIAIJ,
3195        0,
3196        MatGetSubMatricesParallel_MPIAIJ,
3197 /*129*/0,
3198        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3199        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3200        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3201        0,
3202 /*134*/0,
3203        0,
3204        0,
3205        0,
3206        0
3207 };
3208 
3209 /* ----------------------------------------------------------------------------------------*/
3210 
3211 EXTERN_C_BEGIN
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3214 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3215 {
3216   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3217   PetscErrorCode ierr;
3218 
3219   PetscFunctionBegin;
3220   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3221   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3222   PetscFunctionReturn(0);
3223 }
3224 EXTERN_C_END
3225 
3226 EXTERN_C_BEGIN
3227 #undef __FUNCT__
3228 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3229 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3230 {
3231   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3232   PetscErrorCode ierr;
3233 
3234   PetscFunctionBegin;
3235   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3236   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3237   PetscFunctionReturn(0);
3238 }
3239 EXTERN_C_END
3240 
3241 EXTERN_C_BEGIN
3242 #undef __FUNCT__
3243 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3244 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3245 {
3246   Mat_MPIAIJ     *b;
3247   PetscErrorCode ierr;
3248   PetscInt       i;
3249   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3250 
3251   PetscFunctionBegin;
3252   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3253   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3254   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3255   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3256   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3257   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3258 
3259   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3260   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3261   if (d_nnz) {
3262     for (i=0; i<B->rmap->n; i++) {
3263       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3264     }
3265   }
3266   if (o_nnz) {
3267     for (i=0; i<B->rmap->n; i++) {
3268       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3269     }
3270   }
3271   b = (Mat_MPIAIJ*)B->data;
3272 
3273   if (!B->preallocated) {
3274     /* Explicitly create 2 MATSEQAIJ matrices. */
3275     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3276     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3277     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3278     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3279     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3280     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3281     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3282     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3283     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3284     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3285   }
3286 
3287   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3288   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3289   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3290   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3291   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3292   B->preallocated = PETSC_TRUE;
3293   PetscFunctionReturn(0);
3294 }
3295 EXTERN_C_END
3296 
3297 #undef __FUNCT__
3298 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3299 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3300 {
3301   Mat            mat;
3302   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3303   PetscErrorCode ierr;
3304 
3305   PetscFunctionBegin;
3306   *newmat       = 0;
3307   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3308   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3309   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3310   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3311   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3312   a    = (Mat_MPIAIJ*)mat->data;
3313 
3314   mat->factortype    = matin->factortype;
3315   mat->rmap->bs      = matin->rmap->bs;
3316   mat->cmap->bs      = matin->cmap->bs;
3317   mat->assembled    = PETSC_TRUE;
3318   mat->insertmode   = NOT_SET_VALUES;
3319   mat->preallocated = PETSC_TRUE;
3320 
3321   a->size           = oldmat->size;
3322   a->rank           = oldmat->rank;
3323   a->donotstash     = oldmat->donotstash;
3324   a->roworiented    = oldmat->roworiented;
3325   a->rowindices     = 0;
3326   a->rowvalues      = 0;
3327   a->getrowactive   = PETSC_FALSE;
3328 
3329   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3330   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3331 
3332   if (oldmat->colmap) {
3333 #if defined (PETSC_USE_CTABLE)
3334     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3335 #else
3336     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3337     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3338     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3339 #endif
3340   } else a->colmap = 0;
3341   if (oldmat->garray) {
3342     PetscInt len;
3343     len  = oldmat->B->cmap->n;
3344     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3345     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3346     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3347   } else a->garray = 0;
3348 
3349   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3350   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3351   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3352   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3353   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3354   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3355   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3356   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3357   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3358   *newmat = mat;
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 
3363 
3364 #undef __FUNCT__
3365 #define __FUNCT__ "MatLoad_MPIAIJ"
3366 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3367 {
3368   PetscScalar    *vals,*svals;
3369   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3370   PetscErrorCode ierr;
3371   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3372   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3373   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3374   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3375   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3376   int            fd;
3377   PetscInt       bs = 1;
3378 
3379   PetscFunctionBegin;
3380   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3381   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3382   if (!rank) {
3383     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3384     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3385     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3386   }
3387 
3388   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3389   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3390   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3391 
3392   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3393 
3394   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3395   M = header[1]; N = header[2];
3396   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3397   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3398   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3399 
3400   /* If global sizes are set, check if they are consistent with that given in the file */
3401   if (sizesset) {
3402     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3403   }
3404   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3405   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3406 
3407   /* determine ownership of all (block) rows */
3408   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3409   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3410   else m = newMat->rmap->n; /* Set by user */
3411 
3412   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3413   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3414 
3415   /* First process needs enough room for process with most rows */
3416   if (!rank) {
3417     mmax = rowners[1];
3418     for (i=2; i<=size; i++) {
3419       mmax = PetscMax(mmax, rowners[i]);
3420     }
3421   } else mmax = m;
3422 
3423   rowners[0] = 0;
3424   for (i=2; i<=size; i++) {
3425     rowners[i] += rowners[i-1];
3426   }
3427   rstart = rowners[rank];
3428   rend   = rowners[rank+1];
3429 
3430   /* distribute row lengths to all processors */
3431   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3432   if (!rank) {
3433     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3434     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3435     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3436     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3437     for (j=0; j<m; j++) {
3438       procsnz[0] += ourlens[j];
3439     }
3440     for (i=1; i<size; i++) {
3441       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3442       /* calculate the number of nonzeros on each processor */
3443       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3444         procsnz[i] += rowlengths[j];
3445       }
3446       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3447     }
3448     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3449   } else {
3450     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3451   }
3452 
3453   if (!rank) {
3454     /* determine max buffer needed and allocate it */
3455     maxnz = 0;
3456     for (i=0; i<size; i++) {
3457       maxnz = PetscMax(maxnz,procsnz[i]);
3458     }
3459     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3460 
3461     /* read in my part of the matrix column indices  */
3462     nz   = procsnz[0];
3463     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3464     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3465 
3466     /* read in every one elses and ship off */
3467     for (i=1; i<size; i++) {
3468       nz     = procsnz[i];
3469       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3470       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3471     }
3472     ierr = PetscFree(cols);CHKERRQ(ierr);
3473   } else {
3474     /* determine buffer space needed for message */
3475     nz = 0;
3476     for (i=0; i<m; i++) {
3477       nz += ourlens[i];
3478     }
3479     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3480 
3481     /* receive message of column indices*/
3482     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3483   }
3484 
3485   /* determine column ownership if matrix is not square */
3486   if (N != M) {
3487     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3488     else n = newMat->cmap->n;
3489     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3490     cstart = cend - n;
3491   } else {
3492     cstart = rstart;
3493     cend   = rend;
3494     n      = cend - cstart;
3495   }
3496 
3497   /* loop over local rows, determining number of off diagonal entries */
3498   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3499   jj = 0;
3500   for (i=0; i<m; i++) {
3501     for (j=0; j<ourlens[i]; j++) {
3502       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3503       jj++;
3504     }
3505   }
3506 
3507   for (i=0; i<m; i++) {
3508     ourlens[i] -= offlens[i];
3509   }
3510   if (!sizesset) {
3511     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3512   }
3513 
3514   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3515 
3516   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3517 
3518   for (i=0; i<m; i++) {
3519     ourlens[i] += offlens[i];
3520   }
3521 
3522   if (!rank) {
3523     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3524 
3525     /* read in my part of the matrix numerical values  */
3526     nz   = procsnz[0];
3527     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3528 
3529     /* insert into matrix */
3530     jj      = rstart;
3531     smycols = mycols;
3532     svals   = vals;
3533     for (i=0; i<m; i++) {
3534       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3535       smycols += ourlens[i];
3536       svals   += ourlens[i];
3537       jj++;
3538     }
3539 
3540     /* read in other processors and ship out */
3541     for (i=1; i<size; i++) {
3542       nz     = procsnz[i];
3543       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3544       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3545     }
3546     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3547   } else {
3548     /* receive numeric values */
3549     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3550 
3551     /* receive message of values*/
3552     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3553 
3554     /* insert into matrix */
3555     jj      = rstart;
3556     smycols = mycols;
3557     svals   = vals;
3558     for (i=0; i<m; i++) {
3559       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3560       smycols += ourlens[i];
3561       svals   += ourlens[i];
3562       jj++;
3563     }
3564   }
3565   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3566   ierr = PetscFree(vals);CHKERRQ(ierr);
3567   ierr = PetscFree(mycols);CHKERRQ(ierr);
3568   ierr = PetscFree(rowners);CHKERRQ(ierr);
3569   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3570   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3571 
3572   PetscFunctionReturn(0);
3573 }
3574 
3575 #undef __FUNCT__
3576 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3577 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3578 {
3579   PetscErrorCode ierr;
3580   IS             iscol_local;
3581   PetscInt       csize;
3582 
3583   PetscFunctionBegin;
3584   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3585   if (call == MAT_REUSE_MATRIX) {
3586     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3587     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3588   } else {
3589     PetscInt cbs;
3590     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3591     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3592     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3593   }
3594   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3595   if (call == MAT_INITIAL_MATRIX) {
3596     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3597     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3598   }
3599   PetscFunctionReturn(0);
3600 }
3601 
3602 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3603 #undef __FUNCT__
3604 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3605 /*
3606     Not great since it makes two copies of the submatrix, first an SeqAIJ
3607   in local and then by concatenating the local matrices the end result.
3608   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3609 
3610   Note: This requires a sequential iscol with all indices.
3611 */
3612 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3613 {
3614   PetscErrorCode ierr;
3615   PetscMPIInt    rank,size;
3616   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3617   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3618   PetscBool      allcolumns, colflag;
3619   Mat            M,Mreuse;
3620   MatScalar      *vwork,*aa;
3621   MPI_Comm       comm = ((PetscObject)mat)->comm;
3622   Mat_SeqAIJ     *aij;
3623 
3624 
3625   PetscFunctionBegin;
3626   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3628 
3629   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3630   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3631   if (colflag && ncol == mat->cmap->N){
3632     allcolumns = PETSC_TRUE;
3633   } else {
3634     allcolumns = PETSC_FALSE;
3635   }
3636   if (call ==  MAT_REUSE_MATRIX) {
3637     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3638     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3639     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3640   } else {
3641     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3642   }
3643 
3644   /*
3645       m - number of local rows
3646       n - number of columns (same on all processors)
3647       rstart - first row in new global matrix generated
3648   */
3649   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3650   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3651   if (call == MAT_INITIAL_MATRIX) {
3652     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3653     ii  = aij->i;
3654     jj  = aij->j;
3655 
3656     /*
3657         Determine the number of non-zeros in the diagonal and off-diagonal
3658         portions of the matrix in order to do correct preallocation
3659     */
3660 
3661     /* first get start and end of "diagonal" columns */
3662     if (csize == PETSC_DECIDE) {
3663       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3664       if (mglobal == n) { /* square matrix */
3665 	nlocal = m;
3666       } else {
3667         nlocal = n/size + ((n % size) > rank);
3668       }
3669     } else {
3670       nlocal = csize;
3671     }
3672     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3673     rstart = rend - nlocal;
3674     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3675 
3676     /* next, compute all the lengths */
3677     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3678     olens = dlens + m;
3679     for (i=0; i<m; i++) {
3680       jend = ii[i+1] - ii[i];
3681       olen = 0;
3682       dlen = 0;
3683       for (j=0; j<jend; j++) {
3684         if (*jj < rstart || *jj >= rend) olen++;
3685         else dlen++;
3686         jj++;
3687       }
3688       olens[i] = olen;
3689       dlens[i] = dlen;
3690     }
3691     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3692     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3693     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3694     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3695     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3696     ierr = PetscFree(dlens);CHKERRQ(ierr);
3697   } else {
3698     PetscInt ml,nl;
3699 
3700     M = *newmat;
3701     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3702     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3703     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3704     /*
3705          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3706        rather than the slower MatSetValues().
3707     */
3708     M->was_assembled = PETSC_TRUE;
3709     M->assembled     = PETSC_FALSE;
3710   }
3711   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3712   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3713   ii  = aij->i;
3714   jj  = aij->j;
3715   aa  = aij->a;
3716   for (i=0; i<m; i++) {
3717     row   = rstart + i;
3718     nz    = ii[i+1] - ii[i];
3719     cwork = jj;     jj += nz;
3720     vwork = aa;     aa += nz;
3721     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3722   }
3723 
3724   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3725   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3726   *newmat = M;
3727 
3728   /* save submatrix used in processor for next request */
3729   if (call ==  MAT_INITIAL_MATRIX) {
3730     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3731     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3732   }
3733 
3734   PetscFunctionReturn(0);
3735 }
3736 
3737 EXTERN_C_BEGIN
3738 #undef __FUNCT__
3739 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3740 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3741 {
3742   PetscInt       m,cstart, cend,j,nnz,i,d;
3743   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3744   const PetscInt *JJ;
3745   PetscScalar    *values;
3746   PetscErrorCode ierr;
3747 
3748   PetscFunctionBegin;
3749   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3750 
3751   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3752   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3753   m      = B->rmap->n;
3754   cstart = B->cmap->rstart;
3755   cend   = B->cmap->rend;
3756   rstart = B->rmap->rstart;
3757 
3758   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3759 
3760 #if defined(PETSC_USE_DEBUGGING)
3761   for (i=0; i<m; i++) {
3762     nnz     = Ii[i+1]- Ii[i];
3763     JJ      = J + Ii[i];
3764     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3765     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3766     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3767   }
3768 #endif
3769 
3770   for (i=0; i<m; i++) {
3771     nnz     = Ii[i+1]- Ii[i];
3772     JJ      = J + Ii[i];
3773     nnz_max = PetscMax(nnz_max,nnz);
3774     d       = 0;
3775     for (j=0; j<nnz; j++) {
3776       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3777     }
3778     d_nnz[i] = d;
3779     o_nnz[i] = nnz - d;
3780   }
3781   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3782   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3783 
3784   if (v) values = (PetscScalar*)v;
3785   else {
3786     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3787     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3788   }
3789 
3790   for (i=0; i<m; i++) {
3791     ii   = i + rstart;
3792     nnz  = Ii[i+1]- Ii[i];
3793     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3794   }
3795   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3796   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3797 
3798   if (!v) {
3799     ierr = PetscFree(values);CHKERRQ(ierr);
3800   }
3801   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3802   PetscFunctionReturn(0);
3803 }
3804 EXTERN_C_END
3805 
3806 #undef __FUNCT__
3807 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3808 /*@
3809    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3810    (the default parallel PETSc format).
3811 
3812    Collective on MPI_Comm
3813 
3814    Input Parameters:
3815 +  B - the matrix
3816 .  i - the indices into j for the start of each local row (starts with zero)
3817 .  j - the column indices for each local row (starts with zero)
3818 -  v - optional values in the matrix
3819 
3820    Level: developer
3821 
3822    Notes:
3823        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3824      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3825      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3826 
3827        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3828 
3829        The format which is used for the sparse matrix input, is equivalent to a
3830     row-major ordering.. i.e for the following matrix, the input data expected is
3831     as shown:
3832 
3833         1 0 0
3834         2 0 3     P0
3835        -------
3836         4 5 6     P1
3837 
3838      Process0 [P0]: rows_owned=[0,1]
3839         i =  {0,1,3}  [size = nrow+1  = 2+1]
3840         j =  {0,0,2}  [size = nz = 6]
3841         v =  {1,2,3}  [size = nz = 6]
3842 
3843      Process1 [P1]: rows_owned=[2]
3844         i =  {0,3}    [size = nrow+1  = 1+1]
3845         j =  {0,1,2}  [size = nz = 6]
3846         v =  {4,5,6}  [size = nz = 6]
3847 
3848 .keywords: matrix, aij, compressed row, sparse, parallel
3849 
3850 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3851           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3852 @*/
3853 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3854 {
3855   PetscErrorCode ierr;
3856 
3857   PetscFunctionBegin;
3858   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3859   PetscFunctionReturn(0);
3860 }
3861 
3862 #undef __FUNCT__
3863 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3864 /*@C
3865    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3866    (the default parallel PETSc format).  For good matrix assembly performance
3867    the user should preallocate the matrix storage by setting the parameters
3868    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3869    performance can be increased by more than a factor of 50.
3870 
3871    Collective on MPI_Comm
3872 
3873    Input Parameters:
3874 +  A - the matrix
3875 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3876            (same value is used for all local rows)
3877 .  d_nnz - array containing the number of nonzeros in the various rows of the
3878            DIAGONAL portion of the local submatrix (possibly different for each row)
3879            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3880            The size of this array is equal to the number of local rows, i.e 'm'.
3881            For matrices that will be factored, you must leave room for (and set)
3882            the diagonal entry even if it is zero.
3883 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3884            submatrix (same value is used for all local rows).
3885 -  o_nnz - array containing the number of nonzeros in the various rows of the
3886            OFF-DIAGONAL portion of the local submatrix (possibly different for
3887            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3888            structure. The size of this array is equal to the number
3889            of local rows, i.e 'm'.
3890 
3891    If the *_nnz parameter is given then the *_nz parameter is ignored
3892 
3893    The AIJ format (also called the Yale sparse matrix format or
3894    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3895    storage.  The stored row and column indices begin with zero.
3896    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3897 
3898    The parallel matrix is partitioned such that the first m0 rows belong to
3899    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3900    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3901 
3902    The DIAGONAL portion of the local submatrix of a processor can be defined
3903    as the submatrix which is obtained by extraction the part corresponding to
3904    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3905    first row that belongs to the processor, r2 is the last row belonging to
3906    the this processor, and c1-c2 is range of indices of the local part of a
3907    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3908    common case of a square matrix, the row and column ranges are the same and
3909    the DIAGONAL part is also square. The remaining portion of the local
3910    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3911 
3912    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3913 
3914    You can call MatGetInfo() to get information on how effective the preallocation was;
3915    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3916    You can also run with the option -info and look for messages with the string
3917    malloc in them to see if additional memory allocation was needed.
3918 
3919    Example usage:
3920 
3921    Consider the following 8x8 matrix with 34 non-zero values, that is
3922    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3923    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3924    as follows:
3925 
3926 .vb
3927             1  2  0  |  0  3  0  |  0  4
3928     Proc0   0  5  6  |  7  0  0  |  8  0
3929             9  0 10  | 11  0  0  | 12  0
3930     -------------------------------------
3931            13  0 14  | 15 16 17  |  0  0
3932     Proc1   0 18  0  | 19 20 21  |  0  0
3933             0  0  0  | 22 23  0  | 24  0
3934     -------------------------------------
3935     Proc2  25 26 27  |  0  0 28  | 29  0
3936            30  0  0  | 31 32 33  |  0 34
3937 .ve
3938 
3939    This can be represented as a collection of submatrices as:
3940 
3941 .vb
3942       A B C
3943       D E F
3944       G H I
3945 .ve
3946 
3947    Where the submatrices A,B,C are owned by proc0, D,E,F are
3948    owned by proc1, G,H,I are owned by proc2.
3949 
3950    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3951    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3952    The 'M','N' parameters are 8,8, and have the same values on all procs.
3953 
3954    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3955    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3956    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3957    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3958    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3959    matrix, ans [DF] as another SeqAIJ matrix.
3960 
3961    When d_nz, o_nz parameters are specified, d_nz storage elements are
3962    allocated for every row of the local diagonal submatrix, and o_nz
3963    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3964    One way to choose d_nz and o_nz is to use the max nonzerors per local
3965    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3966    In this case, the values of d_nz,o_nz are:
3967 .vb
3968      proc0 : dnz = 2, o_nz = 2
3969      proc1 : dnz = 3, o_nz = 2
3970      proc2 : dnz = 1, o_nz = 4
3971 .ve
3972    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3973    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3974    for proc3. i.e we are using 12+15+10=37 storage locations to store
3975    34 values.
3976 
3977    When d_nnz, o_nnz parameters are specified, the storage is specified
3978    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3979    In the above case the values for d_nnz,o_nnz are:
3980 .vb
3981      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3982      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3983      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3984 .ve
3985    Here the space allocated is sum of all the above values i.e 34, and
3986    hence pre-allocation is perfect.
3987 
3988    Level: intermediate
3989 
3990 .keywords: matrix, aij, compressed row, sparse, parallel
3991 
3992 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3993           MPIAIJ, MatGetInfo()
3994 @*/
3995 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3996 {
3997   PetscErrorCode ierr;
3998 
3999   PetscFunctionBegin;
4000   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4001   PetscValidType(B,1);
4002   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4003   PetscFunctionReturn(0);
4004 }
4005 
4006 #undef __FUNCT__
4007 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4008 /*@
4009      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4010          CSR format the local rows.
4011 
4012    Collective on MPI_Comm
4013 
4014    Input Parameters:
4015 +  comm - MPI communicator
4016 .  m - number of local rows (Cannot be PETSC_DECIDE)
4017 .  n - This value should be the same as the local size used in creating the
4018        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4019        calculated if N is given) For square matrices n is almost always m.
4020 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4021 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4022 .   i - row indices
4023 .   j - column indices
4024 -   a - matrix values
4025 
4026    Output Parameter:
4027 .   mat - the matrix
4028 
4029    Level: intermediate
4030 
4031    Notes:
4032        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4033      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4034      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4035 
4036        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4037 
4038        The format which is used for the sparse matrix input, is equivalent to a
4039     row-major ordering.. i.e for the following matrix, the input data expected is
4040     as shown:
4041 
4042         1 0 0
4043         2 0 3     P0
4044        -------
4045         4 5 6     P1
4046 
4047      Process0 [P0]: rows_owned=[0,1]
4048         i =  {0,1,3}  [size = nrow+1  = 2+1]
4049         j =  {0,0,2}  [size = nz = 6]
4050         v =  {1,2,3}  [size = nz = 6]
4051 
4052      Process1 [P1]: rows_owned=[2]
4053         i =  {0,3}    [size = nrow+1  = 1+1]
4054         j =  {0,1,2}  [size = nz = 6]
4055         v =  {4,5,6}  [size = nz = 6]
4056 
4057 .keywords: matrix, aij, compressed row, sparse, parallel
4058 
4059 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4060           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4061 @*/
4062 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4063 {
4064   PetscErrorCode ierr;
4065 
4066   PetscFunctionBegin;
4067   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4068   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4069   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4070   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4071   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4072   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4073   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4074   PetscFunctionReturn(0);
4075 }
4076 
4077 #undef __FUNCT__
4078 #define __FUNCT__ "MatCreateAIJ"
4079 /*@C
4080    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4081    (the default parallel PETSc format).  For good matrix assembly performance
4082    the user should preallocate the matrix storage by setting the parameters
4083    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4084    performance can be increased by more than a factor of 50.
4085 
4086    Collective on MPI_Comm
4087 
4088    Input Parameters:
4089 +  comm - MPI communicator
4090 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4091            This value should be the same as the local size used in creating the
4092            y vector for the matrix-vector product y = Ax.
4093 .  n - This value should be the same as the local size used in creating the
4094        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4095        calculated if N is given) For square matrices n is almost always m.
4096 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4097 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4098 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4099            (same value is used for all local rows)
4100 .  d_nnz - array containing the number of nonzeros in the various rows of the
4101            DIAGONAL portion of the local submatrix (possibly different for each row)
4102            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4103            The size of this array is equal to the number of local rows, i.e 'm'.
4104 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4105            submatrix (same value is used for all local rows).
4106 -  o_nnz - array containing the number of nonzeros in the various rows of the
4107            OFF-DIAGONAL portion of the local submatrix (possibly different for
4108            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4109            structure. The size of this array is equal to the number
4110            of local rows, i.e 'm'.
4111 
4112    Output Parameter:
4113 .  A - the matrix
4114 
4115    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4116    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4117    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4118 
4119    Notes:
4120    If the *_nnz parameter is given then the *_nz parameter is ignored
4121 
4122    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4123    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4124    storage requirements for this matrix.
4125 
4126    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4127    processor than it must be used on all processors that share the object for
4128    that argument.
4129 
4130    The user MUST specify either the local or global matrix dimensions
4131    (possibly both).
4132 
4133    The parallel matrix is partitioned across processors such that the
4134    first m0 rows belong to process 0, the next m1 rows belong to
4135    process 1, the next m2 rows belong to process 2 etc.. where
4136    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4137    values corresponding to [m x N] submatrix.
4138 
4139    The columns are logically partitioned with the n0 columns belonging
4140    to 0th partition, the next n1 columns belonging to the next
4141    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4142 
4143    The DIAGONAL portion of the local submatrix on any given processor
4144    is the submatrix corresponding to the rows and columns m,n
4145    corresponding to the given processor. i.e diagonal matrix on
4146    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4147    etc. The remaining portion of the local submatrix [m x (N-n)]
4148    constitute the OFF-DIAGONAL portion. The example below better
4149    illustrates this concept.
4150 
4151    For a square global matrix we define each processor's diagonal portion
4152    to be its local rows and the corresponding columns (a square submatrix);
4153    each processor's off-diagonal portion encompasses the remainder of the
4154    local matrix (a rectangular submatrix).
4155 
4156    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4157 
4158    When calling this routine with a single process communicator, a matrix of
4159    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4160    type of communicator, use the construction mechanism:
4161      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4162 
4163    By default, this format uses inodes (identical nodes) when possible.
4164    We search for consecutive rows with the same nonzero structure, thereby
4165    reusing matrix information to achieve increased efficiency.
4166 
4167    Options Database Keys:
4168 +  -mat_no_inode  - Do not use inodes
4169 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4170 -  -mat_aij_oneindex - Internally use indexing starting at 1
4171         rather than 0.  Note that when calling MatSetValues(),
4172         the user still MUST index entries starting at 0!
4173 
4174 
4175    Example usage:
4176 
4177    Consider the following 8x8 matrix with 34 non-zero values, that is
4178    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4179    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4180    as follows:
4181 
4182 .vb
4183             1  2  0  |  0  3  0  |  0  4
4184     Proc0   0  5  6  |  7  0  0  |  8  0
4185             9  0 10  | 11  0  0  | 12  0
4186     -------------------------------------
4187            13  0 14  | 15 16 17  |  0  0
4188     Proc1   0 18  0  | 19 20 21  |  0  0
4189             0  0  0  | 22 23  0  | 24  0
4190     -------------------------------------
4191     Proc2  25 26 27  |  0  0 28  | 29  0
4192            30  0  0  | 31 32 33  |  0 34
4193 .ve
4194 
4195    This can be represented as a collection of submatrices as:
4196 
4197 .vb
4198       A B C
4199       D E F
4200       G H I
4201 .ve
4202 
4203    Where the submatrices A,B,C are owned by proc0, D,E,F are
4204    owned by proc1, G,H,I are owned by proc2.
4205 
4206    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4207    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4208    The 'M','N' parameters are 8,8, and have the same values on all procs.
4209 
4210    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4211    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4212    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4213    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4214    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4215    matrix, ans [DF] as another SeqAIJ matrix.
4216 
4217    When d_nz, o_nz parameters are specified, d_nz storage elements are
4218    allocated for every row of the local diagonal submatrix, and o_nz
4219    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4220    One way to choose d_nz and o_nz is to use the max nonzerors per local
4221    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4222    In this case, the values of d_nz,o_nz are:
4223 .vb
4224      proc0 : dnz = 2, o_nz = 2
4225      proc1 : dnz = 3, o_nz = 2
4226      proc2 : dnz = 1, o_nz = 4
4227 .ve
4228    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4229    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4230    for proc3. i.e we are using 12+15+10=37 storage locations to store
4231    34 values.
4232 
4233    When d_nnz, o_nnz parameters are specified, the storage is specified
4234    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4235    In the above case the values for d_nnz,o_nnz are:
4236 .vb
4237      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4238      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4239      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4240 .ve
4241    Here the space allocated is sum of all the above values i.e 34, and
4242    hence pre-allocation is perfect.
4243 
4244    Level: intermediate
4245 
4246 .keywords: matrix, aij, compressed row, sparse, parallel
4247 
4248 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4249           MPIAIJ, MatCreateMPIAIJWithArrays()
4250 @*/
4251 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4252 {
4253   PetscErrorCode ierr;
4254   PetscMPIInt    size;
4255 
4256   PetscFunctionBegin;
4257   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4258   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4259   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4260   if (size > 1) {
4261     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4262     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4263   } else {
4264     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4265     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4266   }
4267   PetscFunctionReturn(0);
4268 }
4269 
4270 #undef __FUNCT__
4271 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4272 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4273 {
4274   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4275 
4276   PetscFunctionBegin;
4277   *Ad     = a->A;
4278   *Ao     = a->B;
4279   *colmap = a->garray;
4280   PetscFunctionReturn(0);
4281 }
4282 
4283 #undef __FUNCT__
4284 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4285 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4286 {
4287   PetscErrorCode ierr;
4288   PetscInt       i;
4289   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4290 
4291   PetscFunctionBegin;
4292   if (coloring->ctype == IS_COLORING_GLOBAL) {
4293     ISColoringValue *allcolors,*colors;
4294     ISColoring      ocoloring;
4295 
4296     /* set coloring for diagonal portion */
4297     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4298 
4299     /* set coloring for off-diagonal portion */
4300     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4301     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4302     for (i=0; i<a->B->cmap->n; i++) {
4303       colors[i] = allcolors[a->garray[i]];
4304     }
4305     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4306     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4307     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4308     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4309   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4310     ISColoringValue *colors;
4311     PetscInt        *larray;
4312     ISColoring      ocoloring;
4313 
4314     /* set coloring for diagonal portion */
4315     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4316     for (i=0; i<a->A->cmap->n; i++) {
4317       larray[i] = i + A->cmap->rstart;
4318     }
4319     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4320     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4321     for (i=0; i<a->A->cmap->n; i++) {
4322       colors[i] = coloring->colors[larray[i]];
4323     }
4324     ierr = PetscFree(larray);CHKERRQ(ierr);
4325     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4326     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4327     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4328 
4329     /* set coloring for off-diagonal portion */
4330     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4331     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4332     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4333     for (i=0; i<a->B->cmap->n; i++) {
4334       colors[i] = coloring->colors[larray[i]];
4335     }
4336     ierr = PetscFree(larray);CHKERRQ(ierr);
4337     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4338     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4339     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4340   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4341 
4342   PetscFunctionReturn(0);
4343 }
4344 
4345 #if defined(PETSC_HAVE_ADIC)
4346 #undef __FUNCT__
4347 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4348 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4349 {
4350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4351   PetscErrorCode ierr;
4352 
4353   PetscFunctionBegin;
4354   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4355   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4356   PetscFunctionReturn(0);
4357 }
4358 #endif
4359 
4360 #undef __FUNCT__
4361 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4362 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4363 {
4364   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4365   PetscErrorCode ierr;
4366 
4367   PetscFunctionBegin;
4368   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4369   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4370   PetscFunctionReturn(0);
4371 }
4372 
4373 #undef __FUNCT__
4374 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4375 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4376 {
4377   PetscErrorCode ierr;
4378   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4379   PetscInt       *indx;
4380 
4381   PetscFunctionBegin;
4382   /* This routine will ONLY return MPIAIJ type matrix */
4383   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4384   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4385   if (n == PETSC_DECIDE){
4386     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4387   }
4388   /* Check sum(n) = N */
4389   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4390   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4391 
4392   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4393   rstart -= m;
4394 
4395   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4396   for (i=0;i<m;i++) {
4397     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4398     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4399     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4400   }
4401 
4402   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4403   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4404   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4405   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4406   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4407   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4408   PetscFunctionReturn(0);
4409 }
4410 
4411 #undef __FUNCT__
4412 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4413 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4414 {
4415   PetscErrorCode ierr;
4416   PetscInt       m,N,i,rstart,nnz,Ii;
4417   PetscInt       *indx;
4418   PetscScalar    *values;
4419 
4420   PetscFunctionBegin;
4421   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4422   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4423   for (i=0;i<m;i++) {
4424     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4425     Ii    = i + rstart;
4426     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4427     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4428   }
4429   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4430   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4431   PetscFunctionReturn(0);
4432 }
4433 
4434 #undef __FUNCT__
4435 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4436 /*@
4437       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4438                  matrices from each processor
4439 
4440     Collective on MPI_Comm
4441 
4442    Input Parameters:
4443 +    comm - the communicators the parallel matrix will live on
4444 .    inmat - the input sequential matrices
4445 .    n - number of local columns (or PETSC_DECIDE)
4446 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4447 
4448    Output Parameter:
4449 .    outmat - the parallel matrix generated
4450 
4451     Level: advanced
4452 
4453    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4454 
4455 @*/
4456 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4457 {
4458   PetscErrorCode ierr;
4459 
4460   PetscFunctionBegin;
4461   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4462   if (scall == MAT_INITIAL_MATRIX){
4463     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4464   }
4465   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4466   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4467   PetscFunctionReturn(0);
4468 }
4469 
4470 #undef __FUNCT__
4471 #define __FUNCT__ "MatFileSplit"
4472 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4473 {
4474   PetscErrorCode    ierr;
4475   PetscMPIInt       rank;
4476   PetscInt          m,N,i,rstart,nnz;
4477   size_t            len;
4478   const PetscInt    *indx;
4479   PetscViewer       out;
4480   char              *name;
4481   Mat               B;
4482   const PetscScalar *values;
4483 
4484   PetscFunctionBegin;
4485   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4486   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4487   /* Should this be the type of the diagonal block of A? */
4488   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4489   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4490   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4491   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4492   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4493   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4494   for (i=0;i<m;i++) {
4495     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4496     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4497     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4498   }
4499   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4500   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4501 
4502   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4503   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4504   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4505   sprintf(name,"%s.%d",outfile,rank);
4506   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4507   ierr = PetscFree(name);
4508   ierr = MatView(B,out);CHKERRQ(ierr);
4509   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4510   ierr = MatDestroy(&B);CHKERRQ(ierr);
4511   PetscFunctionReturn(0);
4512 }
4513 
4514 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4515 #undef __FUNCT__
4516 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4517 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4518 {
4519   PetscErrorCode       ierr;
4520   Mat_Merge_SeqsToMPI  *merge;
4521   PetscContainer       container;
4522 
4523   PetscFunctionBegin;
4524   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4525   if (container) {
4526     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4534     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4535     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4536     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4537     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4538     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4539     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4540     ierr = PetscFree(merge);CHKERRQ(ierr);
4541     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4542   }
4543   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4544   PetscFunctionReturn(0);
4545 }
4546 
4547 #include <../src/mat/utils/freespace.h>
4548 #include <petscbt.h>
4549 
4550 #undef __FUNCT__
4551 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4552 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4553 {
4554   PetscErrorCode       ierr;
4555   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4556   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4557   PetscMPIInt          size,rank,taga,*len_s;
4558   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4559   PetscInt             proc,m;
4560   PetscInt             **buf_ri,**buf_rj;
4561   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4562   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4563   MPI_Request          *s_waits,*r_waits;
4564   MPI_Status           *status;
4565   MatScalar            *aa=a->a;
4566   MatScalar            **abuf_r,*ba_i;
4567   Mat_Merge_SeqsToMPI  *merge;
4568   PetscContainer       container;
4569 
4570   PetscFunctionBegin;
4571   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4572 
4573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4574   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4575 
4576   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4577   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4578 
4579   bi     = merge->bi;
4580   bj     = merge->bj;
4581   buf_ri = merge->buf_ri;
4582   buf_rj = merge->buf_rj;
4583 
4584   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4585   owners = merge->rowmap->range;
4586   len_s  = merge->len_s;
4587 
4588   /* send and recv matrix values */
4589   /*-----------------------------*/
4590   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4591   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4592 
4593   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4594   for (proc=0,k=0; proc<size; proc++){
4595     if (!len_s[proc]) continue;
4596     i = owners[proc];
4597     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4598     k++;
4599   }
4600 
4601   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4602   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4603   ierr = PetscFree(status);CHKERRQ(ierr);
4604 
4605   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4606   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4607 
4608   /* insert mat values of mpimat */
4609   /*----------------------------*/
4610   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4611   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4612 
4613   for (k=0; k<merge->nrecv; k++){
4614     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4615     nrows = *(buf_ri_k[k]);
4616     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4617     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4618   }
4619 
4620   /* set values of ba */
4621   m = merge->rowmap->n;
4622   for (i=0; i<m; i++) {
4623     arow = owners[rank] + i;
4624     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4625     bnzi = bi[i+1] - bi[i];
4626     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4627 
4628     /* add local non-zero vals of this proc's seqmat into ba */
4629     anzi = ai[arow+1] - ai[arow];
4630     aj   = a->j + ai[arow];
4631     aa   = a->a + ai[arow];
4632     nextaj = 0;
4633     for (j=0; nextaj<anzi; j++){
4634       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4635         ba_i[j] += aa[nextaj++];
4636       }
4637     }
4638 
4639     /* add received vals into ba */
4640     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4641       /* i-th row */
4642       if (i == *nextrow[k]) {
4643         anzi = *(nextai[k]+1) - *nextai[k];
4644         aj   = buf_rj[k] + *(nextai[k]);
4645         aa   = abuf_r[k] + *(nextai[k]);
4646         nextaj = 0;
4647         for (j=0; nextaj<anzi; j++){
4648           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4649             ba_i[j] += aa[nextaj++];
4650           }
4651         }
4652         nextrow[k]++; nextai[k]++;
4653       }
4654     }
4655     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4656   }
4657   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4658   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4659 
4660   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4661   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4662   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4663   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4664   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4665   PetscFunctionReturn(0);
4666 }
4667 
4668 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4669 
4670 #undef __FUNCT__
4671 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4672 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4673 {
4674   PetscErrorCode       ierr;
4675   Mat                  B_mpi;
4676   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4677   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4678   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4679   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4680   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4681   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4682   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4683   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4684   MPI_Status           *status;
4685   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4686   PetscBT              lnkbt;
4687   Mat_Merge_SeqsToMPI  *merge;
4688   PetscContainer       container;
4689 
4690   PetscFunctionBegin;
4691   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4692 
4693   /* make sure it is a PETSc comm */
4694   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4695   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4696   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4697 
4698   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4699   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4700 
4701   /* determine row ownership */
4702   /*---------------------------------------------------------*/
4703   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4704   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4705   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4706   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4707   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4708   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4709   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4710 
4711   m      = merge->rowmap->n;
4712   M      = merge->rowmap->N;
4713   owners = merge->rowmap->range;
4714 
4715   /* determine the number of messages to send, their lengths */
4716   /*---------------------------------------------------------*/
4717   len_s  = merge->len_s;
4718 
4719   len = 0;  /* length of buf_si[] */
4720   merge->nsend = 0;
4721   for (proc=0; proc<size; proc++){
4722     len_si[proc] = 0;
4723     if (proc == rank){
4724       len_s[proc] = 0;
4725     } else {
4726       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4727       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4728     }
4729     if (len_s[proc]) {
4730       merge->nsend++;
4731       nrows = 0;
4732       for (i=owners[proc]; i<owners[proc+1]; i++){
4733         if (ai[i+1] > ai[i]) nrows++;
4734       }
4735       len_si[proc] = 2*(nrows+1);
4736       len += len_si[proc];
4737     }
4738   }
4739 
4740   /* determine the number and length of messages to receive for ij-structure */
4741   /*-------------------------------------------------------------------------*/
4742   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4743   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4744 
4745   /* post the Irecv of j-structure */
4746   /*-------------------------------*/
4747   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4748   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4749 
4750   /* post the Isend of j-structure */
4751   /*--------------------------------*/
4752   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4753 
4754   for (proc=0, k=0; proc<size; proc++){
4755     if (!len_s[proc]) continue;
4756     i = owners[proc];
4757     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4758     k++;
4759   }
4760 
4761   /* receives and sends of j-structure are complete */
4762   /*------------------------------------------------*/
4763   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4764   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4765 
4766   /* send and recv i-structure */
4767   /*---------------------------*/
4768   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4769   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4770 
4771   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4772   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4773   for (proc=0,k=0; proc<size; proc++){
4774     if (!len_s[proc]) continue;
4775     /* form outgoing message for i-structure:
4776          buf_si[0]:                 nrows to be sent
4777                [1:nrows]:           row index (global)
4778                [nrows+1:2*nrows+1]: i-structure index
4779     */
4780     /*-------------------------------------------*/
4781     nrows = len_si[proc]/2 - 1;
4782     buf_si_i    = buf_si + nrows+1;
4783     buf_si[0]   = nrows;
4784     buf_si_i[0] = 0;
4785     nrows = 0;
4786     for (i=owners[proc]; i<owners[proc+1]; i++){
4787       anzi = ai[i+1] - ai[i];
4788       if (anzi) {
4789         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4790         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4791         nrows++;
4792       }
4793     }
4794     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4795     k++;
4796     buf_si += len_si[proc];
4797   }
4798 
4799   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4800   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4801 
4802   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4803   for (i=0; i<merge->nrecv; i++){
4804     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4805   }
4806 
4807   ierr = PetscFree(len_si);CHKERRQ(ierr);
4808   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4809   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4810   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4811   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4812   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4813   ierr = PetscFree(status);CHKERRQ(ierr);
4814 
4815   /* compute a local seq matrix in each processor */
4816   /*----------------------------------------------*/
4817   /* allocate bi array and free space for accumulating nonzero column info */
4818   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4819   bi[0] = 0;
4820 
4821   /* create and initialize a linked list */
4822   nlnk = N+1;
4823   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4824 
4825   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4826   len = 0;
4827   len  = ai[owners[rank+1]] - ai[owners[rank]];
4828   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4829   current_space = free_space;
4830 
4831   /* determine symbolic info for each local row */
4832   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4833 
4834   for (k=0; k<merge->nrecv; k++){
4835     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4836     nrows = *buf_ri_k[k];
4837     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4838     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4839   }
4840 
4841   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4842   len = 0;
4843   for (i=0;i<m;i++) {
4844     bnzi   = 0;
4845     /* add local non-zero cols of this proc's seqmat into lnk */
4846     arow   = owners[rank] + i;
4847     anzi   = ai[arow+1] - ai[arow];
4848     aj     = a->j + ai[arow];
4849     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4850     bnzi += nlnk;
4851     /* add received col data into lnk */
4852     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4853       if (i == *nextrow[k]) { /* i-th row */
4854         anzi = *(nextai[k]+1) - *nextai[k];
4855         aj   = buf_rj[k] + *nextai[k];
4856         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4857         bnzi += nlnk;
4858         nextrow[k]++; nextai[k]++;
4859       }
4860     }
4861     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4862 
4863     /* if free space is not available, make more free space */
4864     if (current_space->local_remaining<bnzi) {
4865       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4866       nspacedouble++;
4867     }
4868     /* copy data into free space, then initialize lnk */
4869     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4870     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4871 
4872     current_space->array           += bnzi;
4873     current_space->local_used      += bnzi;
4874     current_space->local_remaining -= bnzi;
4875 
4876     bi[i+1] = bi[i] + bnzi;
4877   }
4878 
4879   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4880 
4881   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4882   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4883   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4884 
4885   /* create symbolic parallel matrix B_mpi */
4886   /*---------------------------------------*/
4887     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4888   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4889   if (n==PETSC_DECIDE) {
4890     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4891   } else {
4892     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4893   }
4894   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4895   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4896   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4897   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4898   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4899 
4900   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4901   B_mpi->assembled     = PETSC_FALSE;
4902   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4903   merge->bi            = bi;
4904   merge->bj            = bj;
4905   merge->buf_ri        = buf_ri;
4906   merge->buf_rj        = buf_rj;
4907   merge->coi           = PETSC_NULL;
4908   merge->coj           = PETSC_NULL;
4909   merge->owners_co     = PETSC_NULL;
4910 
4911   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4912 
4913   /* attach the supporting struct to B_mpi for reuse */
4914   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4915   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4916   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4917   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4918   *mpimat = B_mpi;
4919 
4920   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4921   PetscFunctionReturn(0);
4922 }
4923 
4924 #undef __FUNCT__
4925 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4926 /*@C
4927       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4928                  matrices from each processor
4929 
4930     Collective on MPI_Comm
4931 
4932    Input Parameters:
4933 +    comm - the communicators the parallel matrix will live on
4934 .    seqmat - the input sequential matrices
4935 .    m - number of local rows (or PETSC_DECIDE)
4936 .    n - number of local columns (or PETSC_DECIDE)
4937 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4938 
4939    Output Parameter:
4940 .    mpimat - the parallel matrix generated
4941 
4942     Level: advanced
4943 
4944    Notes:
4945      The dimensions of the sequential matrix in each processor MUST be the same.
4946      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4947      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4948 @*/
4949 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4950 {
4951   PetscErrorCode   ierr;
4952   PetscMPIInt     size;
4953 
4954   PetscFunctionBegin;
4955   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4956   if (size == 1){
4957      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4958      if (scall == MAT_INITIAL_MATRIX){
4959        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4960      } else {
4961        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4962      }
4963      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4964      PetscFunctionReturn(0);
4965   }
4966   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4967   if (scall == MAT_INITIAL_MATRIX){
4968     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4969   }
4970   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4971   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4972   PetscFunctionReturn(0);
4973 }
4974 
4975 #undef __FUNCT__
4976 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4977 /*@
4978      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4979           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4980           with MatGetSize()
4981 
4982     Not Collective
4983 
4984    Input Parameters:
4985 +    A - the matrix
4986 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4987 
4988    Output Parameter:
4989 .    A_loc - the local sequential matrix generated
4990 
4991     Level: developer
4992 
4993 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4994 
4995 @*/
4996 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4997 {
4998   PetscErrorCode  ierr;
4999   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5000   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5001   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5002   MatScalar       *aa=a->a,*ba=b->a,*cam;
5003   PetscScalar     *ca;
5004   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5005   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5006   PetscBool       match;
5007 
5008   PetscFunctionBegin;
5009   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5010   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5011   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5012   if (scall == MAT_INITIAL_MATRIX){
5013     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5014     ci[0] = 0;
5015     for (i=0; i<am; i++){
5016       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5017     }
5018     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5019     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5020     k = 0;
5021     for (i=0; i<am; i++) {
5022       ncols_o = bi[i+1] - bi[i];
5023       ncols_d = ai[i+1] - ai[i];
5024       /* off-diagonal portion of A */
5025       for (jo=0; jo<ncols_o; jo++) {
5026         col = cmap[*bj];
5027         if (col >= cstart) break;
5028         cj[k]   = col; bj++;
5029         ca[k++] = *ba++;
5030       }
5031       /* diagonal portion of A */
5032       for (j=0; j<ncols_d; j++) {
5033         cj[k]   = cstart + *aj++;
5034         ca[k++] = *aa++;
5035       }
5036       /* off-diagonal portion of A */
5037       for (j=jo; j<ncols_o; j++) {
5038         cj[k]   = cmap[*bj++];
5039         ca[k++] = *ba++;
5040       }
5041     }
5042     /* put together the new matrix */
5043     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5044     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5045     /* Since these are PETSc arrays, change flags to free them as necessary. */
5046     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5047     mat->free_a  = PETSC_TRUE;
5048     mat->free_ij = PETSC_TRUE;
5049     mat->nonew   = 0;
5050   } else if (scall == MAT_REUSE_MATRIX){
5051     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5052     ci = mat->i; cj = mat->j; cam = mat->a;
5053     for (i=0; i<am; i++) {
5054       /* off-diagonal portion of A */
5055       ncols_o = bi[i+1] - bi[i];
5056       for (jo=0; jo<ncols_o; jo++) {
5057         col = cmap[*bj];
5058         if (col >= cstart) break;
5059         *cam++ = *ba++; bj++;
5060       }
5061       /* diagonal portion of A */
5062       ncols_d = ai[i+1] - ai[i];
5063       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5064       /* off-diagonal portion of A */
5065       for (j=jo; j<ncols_o; j++) {
5066         *cam++ = *ba++; bj++;
5067       }
5068     }
5069   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5070   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5071   PetscFunctionReturn(0);
5072 }
5073 
5074 #undef __FUNCT__
5075 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5076 /*@C
5077      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5078 
5079     Not Collective
5080 
5081    Input Parameters:
5082 +    A - the matrix
5083 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5084 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5085 
5086    Output Parameter:
5087 .    A_loc - the local sequential matrix generated
5088 
5089     Level: developer
5090 
5091 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5092 
5093 @*/
5094 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5095 {
5096   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5097   PetscErrorCode    ierr;
5098   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5099   IS                isrowa,iscola;
5100   Mat               *aloc;
5101   PetscBool       match;
5102 
5103   PetscFunctionBegin;
5104   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5105   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5106   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5107   if (!row){
5108     start = A->rmap->rstart; end = A->rmap->rend;
5109     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5110   } else {
5111     isrowa = *row;
5112   }
5113   if (!col){
5114     start = A->cmap->rstart;
5115     cmap  = a->garray;
5116     nzA   = a->A->cmap->n;
5117     nzB   = a->B->cmap->n;
5118     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5119     ncols = 0;
5120     for (i=0; i<nzB; i++) {
5121       if (cmap[i] < start) idx[ncols++] = cmap[i];
5122       else break;
5123     }
5124     imark = i;
5125     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5126     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5127     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5128   } else {
5129     iscola = *col;
5130   }
5131   if (scall != MAT_INITIAL_MATRIX){
5132     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5133     aloc[0] = *A_loc;
5134   }
5135   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5136   *A_loc = aloc[0];
5137   ierr = PetscFree(aloc);CHKERRQ(ierr);
5138   if (!row){
5139     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5140   }
5141   if (!col){
5142     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5143   }
5144   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5145   PetscFunctionReturn(0);
5146 }
5147 
5148 #undef __FUNCT__
5149 #define __FUNCT__ "MatGetBrowsOfAcols"
5150 /*@C
5151     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5152 
5153     Collective on Mat
5154 
5155    Input Parameters:
5156 +    A,B - the matrices in mpiaij format
5157 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5158 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5159 
5160    Output Parameter:
5161 +    rowb, colb - index sets of rows and columns of B to extract
5162 -    B_seq - the sequential matrix generated
5163 
5164     Level: developer
5165 
5166 @*/
5167 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5168 {
5169   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5170   PetscErrorCode    ierr;
5171   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5172   IS                isrowb,iscolb;
5173   Mat               *bseq=PETSC_NULL;
5174 
5175   PetscFunctionBegin;
5176   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5177     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5178   }
5179   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5180 
5181   if (scall == MAT_INITIAL_MATRIX){
5182     start = A->cmap->rstart;
5183     cmap  = a->garray;
5184     nzA   = a->A->cmap->n;
5185     nzB   = a->B->cmap->n;
5186     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5187     ncols = 0;
5188     for (i=0; i<nzB; i++) {  /* row < local row index */
5189       if (cmap[i] < start) idx[ncols++] = cmap[i];
5190       else break;
5191     }
5192     imark = i;
5193     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5194     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5195     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5196     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5197   } else {
5198     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5199     isrowb = *rowb; iscolb = *colb;
5200     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5201     bseq[0] = *B_seq;
5202   }
5203   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5204   *B_seq = bseq[0];
5205   ierr = PetscFree(bseq);CHKERRQ(ierr);
5206   if (!rowb){
5207     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5208   } else {
5209     *rowb = isrowb;
5210   }
5211   if (!colb){
5212     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5213   } else {
5214     *colb = iscolb;
5215   }
5216   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5217   PetscFunctionReturn(0);
5218 }
5219 
5220 #undef __FUNCT__
5221 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5222 /*
5223     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5224     of the OFF-DIAGONAL portion of local A
5225 
5226     Collective on Mat
5227 
5228    Input Parameters:
5229 +    A,B - the matrices in mpiaij format
5230 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5231 
5232    Output Parameter:
5233 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5234 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5235 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5236 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5237 
5238     Level: developer
5239 
5240 */
5241 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5242 {
5243   VecScatter_MPI_General *gen_to,*gen_from;
5244   PetscErrorCode         ierr;
5245   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5246   Mat_SeqAIJ             *b_oth;
5247   VecScatter             ctx=a->Mvctx;
5248   MPI_Comm               comm=((PetscObject)ctx)->comm;
5249   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5250   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5251   PetscScalar            *rvalues,*svalues;
5252   MatScalar              *b_otha,*bufa,*bufA;
5253   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5254   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5255   MPI_Status             *sstatus,rstatus;
5256   PetscMPIInt            jj;
5257   PetscInt               *cols,sbs,rbs;
5258   PetscScalar            *vals;
5259 
5260   PetscFunctionBegin;
5261   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5262     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5263   }
5264   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5265   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5266 
5267   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5268   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5269   rvalues  = gen_from->values; /* holds the length of receiving row */
5270   svalues  = gen_to->values;   /* holds the length of sending row */
5271   nrecvs   = gen_from->n;
5272   nsends   = gen_to->n;
5273 
5274   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5275   srow     = gen_to->indices;   /* local row index to be sent */
5276   sstarts  = gen_to->starts;
5277   sprocs   = gen_to->procs;
5278   sstatus  = gen_to->sstatus;
5279   sbs      = gen_to->bs;
5280   rstarts  = gen_from->starts;
5281   rprocs   = gen_from->procs;
5282   rbs      = gen_from->bs;
5283 
5284   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5285   if (scall == MAT_INITIAL_MATRIX){
5286     /* i-array */
5287     /*---------*/
5288     /*  post receives */
5289     for (i=0; i<nrecvs; i++){
5290       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5291       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5292       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5293     }
5294 
5295     /* pack the outgoing message */
5296     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5297     sstartsj[0] = 0;  rstartsj[0] = 0;
5298     len = 0; /* total length of j or a array to be sent */
5299     k = 0;
5300     for (i=0; i<nsends; i++){
5301       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5302       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5303       for (j=0; j<nrows; j++) {
5304         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5305         for (l=0; l<sbs; l++){
5306           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5307           rowlen[j*sbs+l] = ncols;
5308           len += ncols;
5309           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5310         }
5311         k++;
5312       }
5313       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5314       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5315     }
5316     /* recvs and sends of i-array are completed */
5317     i = nrecvs;
5318     while (i--) {
5319       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5320     }
5321     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5322 
5323     /* allocate buffers for sending j and a arrays */
5324     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5325     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5326 
5327     /* create i-array of B_oth */
5328     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5329     b_othi[0] = 0;
5330     len = 0; /* total length of j or a array to be received */
5331     k = 0;
5332     for (i=0; i<nrecvs; i++){
5333       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5334       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5335       for (j=0; j<nrows; j++) {
5336         b_othi[k+1] = b_othi[k] + rowlen[j];
5337         len += rowlen[j]; k++;
5338       }
5339       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5340     }
5341 
5342     /* allocate space for j and a arrrays of B_oth */
5343     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5344     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5345 
5346     /* j-array */
5347     /*---------*/
5348     /*  post receives of j-array */
5349     for (i=0; i<nrecvs; i++){
5350       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5351       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5352     }
5353 
5354     /* pack the outgoing message j-array */
5355     k = 0;
5356     for (i=0; i<nsends; i++){
5357       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5358       bufJ = bufj+sstartsj[i];
5359       for (j=0; j<nrows; j++) {
5360         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5361         for (ll=0; ll<sbs; ll++){
5362           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5363           for (l=0; l<ncols; l++){
5364             *bufJ++ = cols[l];
5365           }
5366           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5367         }
5368       }
5369       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5370     }
5371 
5372     /* recvs and sends of j-array are completed */
5373     i = nrecvs;
5374     while (i--) {
5375       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5376     }
5377     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5378   } else if (scall == MAT_REUSE_MATRIX){
5379     sstartsj = *startsj_s;
5380     rstartsj = *startsj_r;
5381     bufa     = *bufa_ptr;
5382     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5383     b_otha   = b_oth->a;
5384   } else {
5385     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5386   }
5387 
5388   /* a-array */
5389   /*---------*/
5390   /*  post receives of a-array */
5391   for (i=0; i<nrecvs; i++){
5392     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5393     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5394   }
5395 
5396   /* pack the outgoing message a-array */
5397   k = 0;
5398   for (i=0; i<nsends; i++){
5399     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5400     bufA = bufa+sstartsj[i];
5401     for (j=0; j<nrows; j++) {
5402       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5403       for (ll=0; ll<sbs; ll++){
5404         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5405         for (l=0; l<ncols; l++){
5406           *bufA++ = vals[l];
5407         }
5408         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5409       }
5410     }
5411     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5412   }
5413   /* recvs and sends of a-array are completed */
5414   i = nrecvs;
5415   while (i--) {
5416     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5417   }
5418   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5419   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5420 
5421   if (scall == MAT_INITIAL_MATRIX){
5422     /* put together the new matrix */
5423     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5424 
5425     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5426     /* Since these are PETSc arrays, change flags to free them as necessary. */
5427     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5428     b_oth->free_a  = PETSC_TRUE;
5429     b_oth->free_ij = PETSC_TRUE;
5430     b_oth->nonew   = 0;
5431 
5432     ierr = PetscFree(bufj);CHKERRQ(ierr);
5433     if (!startsj_s || !bufa_ptr){
5434       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5435       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5436     } else {
5437       *startsj_s = sstartsj;
5438       *startsj_r = rstartsj;
5439       *bufa_ptr  = bufa;
5440     }
5441   }
5442   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5443   PetscFunctionReturn(0);
5444 }
5445 
5446 #undef __FUNCT__
5447 #define __FUNCT__ "MatGetCommunicationStructs"
5448 /*@C
5449   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5450 
5451   Not Collective
5452 
5453   Input Parameters:
5454 . A - The matrix in mpiaij format
5455 
5456   Output Parameter:
5457 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5458 . colmap - A map from global column index to local index into lvec
5459 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5460 
5461   Level: developer
5462 
5463 @*/
5464 #if defined (PETSC_USE_CTABLE)
5465 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5466 #else
5467 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5468 #endif
5469 {
5470   Mat_MPIAIJ *a;
5471 
5472   PetscFunctionBegin;
5473   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5474   PetscValidPointer(lvec, 2);
5475   PetscValidPointer(colmap, 3);
5476   PetscValidPointer(multScatter, 4);
5477   a = (Mat_MPIAIJ *) A->data;
5478   if (lvec) *lvec = a->lvec;
5479   if (colmap) *colmap = a->colmap;
5480   if (multScatter) *multScatter = a->Mvctx;
5481   PetscFunctionReturn(0);
5482 }
5483 
5484 EXTERN_C_BEGIN
5485 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5486 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5487 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5488 EXTERN_C_END
5489 
5490 #undef __FUNCT__
5491 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5492 /*
5493     Computes (B'*A')' since computing B*A directly is untenable
5494 
5495                n                       p                          p
5496         (              )       (              )         (                  )
5497       m (      A       )  *  n (       B      )   =   m (         C        )
5498         (              )       (              )         (                  )
5499 
5500 */
5501 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5502 {
5503   PetscErrorCode     ierr;
5504   Mat                At,Bt,Ct;
5505 
5506   PetscFunctionBegin;
5507   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5508   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5509   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5510   ierr = MatDestroy(&At);CHKERRQ(ierr);
5511   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5512   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5513   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 #undef __FUNCT__
5518 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5519 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5520 {
5521   PetscErrorCode ierr;
5522   PetscInt       m=A->rmap->n,n=B->cmap->n;
5523   Mat            Cmat;
5524 
5525   PetscFunctionBegin;
5526   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5527   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5528   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5529   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5530   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5531   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5532   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5533   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5534   *C   = Cmat;
5535   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5536   PetscFunctionReturn(0);
5537 }
5538 
5539 /* ----------------------------------------------------------------*/
5540 #undef __FUNCT__
5541 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5542 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5543 {
5544   PetscErrorCode ierr;
5545 
5546   PetscFunctionBegin;
5547   if (scall == MAT_INITIAL_MATRIX){
5548     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5549   }
5550   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5551   PetscFunctionReturn(0);
5552 }
5553 
5554 EXTERN_C_BEGIN
5555 #if defined(PETSC_HAVE_MUMPS)
5556 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5557 #endif
5558 #if defined(PETSC_HAVE_PASTIX)
5559 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5560 #endif
5561 #if defined(PETSC_HAVE_SUPERLU_DIST)
5562 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5563 #endif
5564 #if defined(PETSC_HAVE_CLIQUE)
5565 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5566 #endif
5567 EXTERN_C_END
5568 
5569 /*MC
5570    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5571 
5572    Options Database Keys:
5573 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5574 
5575   Level: beginner
5576 
5577 .seealso: MatCreateAIJ()
5578 M*/
5579 
5580 EXTERN_C_BEGIN
5581 #undef __FUNCT__
5582 #define __FUNCT__ "MatCreate_MPIAIJ"
5583 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5584 {
5585   Mat_MPIAIJ     *b;
5586   PetscErrorCode ierr;
5587   PetscMPIInt    size;
5588 
5589   PetscFunctionBegin;
5590   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5591   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5592   B->data         = (void*)b;
5593   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5594   B->assembled    = PETSC_FALSE;
5595   B->insertmode   = NOT_SET_VALUES;
5596   b->size         = size;
5597   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5598 
5599   /* build cache for off array entries formed */
5600   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5601   b->donotstash  = PETSC_FALSE;
5602   b->colmap      = 0;
5603   b->garray      = 0;
5604   b->roworiented = PETSC_TRUE;
5605 
5606   /* stuff used for matrix vector multiply */
5607   b->lvec      = PETSC_NULL;
5608   b->Mvctx     = PETSC_NULL;
5609 
5610   /* stuff for MatGetRow() */
5611   b->rowindices   = 0;
5612   b->rowvalues    = 0;
5613   b->getrowactive = PETSC_FALSE;
5614 
5615   /* flexible pointer used in CUSP/CUSPARSE classes */
5616   b->spptr        = PETSC_NULL;
5617 
5618 #if defined(PETSC_HAVE_MUMPS)
5619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5620                                      "MatGetFactor_aij_mumps",
5621                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5622 #endif
5623 #if defined(PETSC_HAVE_PASTIX)
5624   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5625 					   "MatGetFactor_mpiaij_pastix",
5626 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5627 #endif
5628 #if defined(PETSC_HAVE_SUPERLU_DIST)
5629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5630                                      "MatGetFactor_mpiaij_superlu_dist",
5631                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5632 #endif
5633 #if defined(PETSC_HAVE_CLIQUE)
5634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5635                                      "MatGetFactor_aij_clique",
5636                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5637 #endif
5638   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5639                                      "MatStoreValues_MPIAIJ",
5640                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5641   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5642                                      "MatRetrieveValues_MPIAIJ",
5643                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5644   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5645 				     "MatGetDiagonalBlock_MPIAIJ",
5646                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5648 				     "MatIsTranspose_MPIAIJ",
5649 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5650   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5651 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5652 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5653   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5654 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5655 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5657 				     "MatDiagonalScaleLocal_MPIAIJ",
5658 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5660                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5661                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5663                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5664                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5666                                      "MatConvert_MPIAIJ_MPISBAIJ",
5667                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5669                                      "MatMatMult_MPIDense_MPIAIJ",
5670                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5671   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5672                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5673                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5675                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5676                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5677   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5678   PetscFunctionReturn(0);
5679 }
5680 EXTERN_C_END
5681 
5682 #undef __FUNCT__
5683 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5684 /*@
5685      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5686          and "off-diagonal" part of the matrix in CSR format.
5687 
5688    Collective on MPI_Comm
5689 
5690    Input Parameters:
5691 +  comm - MPI communicator
5692 .  m - number of local rows (Cannot be PETSC_DECIDE)
5693 .  n - This value should be the same as the local size used in creating the
5694        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5695        calculated if N is given) For square matrices n is almost always m.
5696 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5697 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5698 .   i - row indices for "diagonal" portion of matrix
5699 .   j - column indices
5700 .   a - matrix values
5701 .   oi - row indices for "off-diagonal" portion of matrix
5702 .   oj - column indices
5703 -   oa - matrix values
5704 
5705    Output Parameter:
5706 .   mat - the matrix
5707 
5708    Level: advanced
5709 
5710    Notes:
5711        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5712        must free the arrays once the matrix has been destroyed and not before.
5713 
5714        The i and j indices are 0 based
5715 
5716        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5717 
5718        This sets local rows and cannot be used to set off-processor values.
5719 
5720        You cannot later use MatSetValues() to change values in this matrix.
5721 
5722 .keywords: matrix, aij, compressed row, sparse, parallel
5723 
5724 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5725           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5726 @*/
5727 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5728 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5729 {
5730   PetscErrorCode ierr;
5731   Mat_MPIAIJ     *maij;
5732 
5733  PetscFunctionBegin;
5734   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5735   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5736   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5737   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5738   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5739   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5740   maij = (Mat_MPIAIJ*) (*mat)->data;
5741   maij->donotstash     = PETSC_TRUE;
5742   (*mat)->preallocated = PETSC_TRUE;
5743 
5744   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5745   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5746 
5747   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5748   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5749 
5750   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5751   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5752   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5753   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5754 
5755   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5756   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5757   PetscFunctionReturn(0);
5758 }
5759 
5760 /*
5761     Special version for direct calls from Fortran
5762 */
5763 #include <petsc-private/fortranimpl.h>
5764 
5765 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5766 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5767 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5768 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5769 #endif
5770 
5771 /* Change these macros so can be used in void function */
5772 #undef CHKERRQ
5773 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5774 #undef SETERRQ2
5775 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5776 #undef SETERRQ3
5777 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5778 #undef SETERRQ
5779 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5780 
5781 EXTERN_C_BEGIN
5782 #undef __FUNCT__
5783 #define __FUNCT__ "matsetvaluesmpiaij_"
5784 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5785 {
5786   Mat             mat = *mmat;
5787   PetscInt        m = *mm, n = *mn;
5788   InsertMode      addv = *maddv;
5789   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5790   PetscScalar     value;
5791   PetscErrorCode  ierr;
5792 
5793   MatCheckPreallocated(mat,1);
5794   if (mat->insertmode == NOT_SET_VALUES) {
5795     mat->insertmode = addv;
5796   }
5797 #if defined(PETSC_USE_DEBUG)
5798   else if (mat->insertmode != addv) {
5799     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5800   }
5801 #endif
5802   {
5803   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5804   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5805   PetscBool       roworiented = aij->roworiented;
5806 
5807   /* Some Variables required in the macro */
5808   Mat             A = aij->A;
5809   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5810   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5811   MatScalar       *aa = a->a;
5812   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5813   Mat             B = aij->B;
5814   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5815   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5816   MatScalar       *ba = b->a;
5817 
5818   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5819   PetscInt        nonew = a->nonew;
5820   MatScalar       *ap1,*ap2;
5821 
5822   PetscFunctionBegin;
5823   for (i=0; i<m; i++) {
5824     if (im[i] < 0) continue;
5825 #if defined(PETSC_USE_DEBUG)
5826     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5827 #endif
5828     if (im[i] >= rstart && im[i] < rend) {
5829       row      = im[i] - rstart;
5830       lastcol1 = -1;
5831       rp1      = aj + ai[row];
5832       ap1      = aa + ai[row];
5833       rmax1    = aimax[row];
5834       nrow1    = ailen[row];
5835       low1     = 0;
5836       high1    = nrow1;
5837       lastcol2 = -1;
5838       rp2      = bj + bi[row];
5839       ap2      = ba + bi[row];
5840       rmax2    = bimax[row];
5841       nrow2    = bilen[row];
5842       low2     = 0;
5843       high2    = nrow2;
5844 
5845       for (j=0; j<n; j++) {
5846         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5847         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5848         if (in[j] >= cstart && in[j] < cend){
5849           col = in[j] - cstart;
5850           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5851         } else if (in[j] < 0) continue;
5852 #if defined(PETSC_USE_DEBUG)
5853         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5854 #endif
5855         else {
5856           if (mat->was_assembled) {
5857             if (!aij->colmap) {
5858               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5859             }
5860 #if defined (PETSC_USE_CTABLE)
5861             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5862 	    col--;
5863 #else
5864             col = aij->colmap[in[j]] - 1;
5865 #endif
5866             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5867               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5868               col =  in[j];
5869               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5870               B = aij->B;
5871               b = (Mat_SeqAIJ*)B->data;
5872               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5873               rp2      = bj + bi[row];
5874               ap2      = ba + bi[row];
5875               rmax2    = bimax[row];
5876               nrow2    = bilen[row];
5877               low2     = 0;
5878               high2    = nrow2;
5879               bm       = aij->B->rmap->n;
5880               ba = b->a;
5881             }
5882           } else col = in[j];
5883           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5884         }
5885       }
5886     } else {
5887       if (!aij->donotstash) {
5888         if (roworiented) {
5889           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5890         } else {
5891           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5892         }
5893       }
5894     }
5895   }}
5896   PetscFunctionReturnVoid();
5897 }
5898 EXTERN_C_END
5899 
5900