xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 99e65526ab8c7f53260480e236965c949e23a406)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   /* used by MatAXPY() */
730   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
731   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
732 
733   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
734   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
735 
736   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
737   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
738     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
739     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
747 {
748   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
753   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatZeroRows_MPIAIJ"
759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
760 {
761   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
762   PetscInt      *owners = A->rmap->range;
763   PetscInt       n      = A->rmap->n;
764   PetscMPIInt    size   = mat->size;
765   PetscSF        sf;
766   PetscInt      *lrows;
767   PetscSFNode   *rrows;
768   PetscInt       lastidx = -1, r, p = 0, len = 0;
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   /* Create SF where leaves are input rows and roots are owned rows */
773   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
774   for (r = 0; r < n; ++r) lrows[r] = -1;
775   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
776   for (r = 0; r < N; ++r) {
777     const PetscInt idx   = rows[r];
778     PetscBool      found = PETSC_FALSE;
779     /* Trick for efficient searching for sorted rows */
780     if (lastidx > idx) p = 0;
781     lastidx = idx;
782     for (; p < size; ++p) {
783       if (idx >= owners[p] && idx < owners[p+1]) {
784         rrows[r].rank  = p;
785         rrows[r].index = rows[r] - owners[p];
786         found = PETSC_TRUE;
787         break;
788       }
789     }
790     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
791   }
792   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
793   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
794   /* Collect flags for rows to be zeroed */
795   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
796   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
797   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
798   /* Compress and put in row numbers */
799   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
800   /* fix right hand side if needed */
801   if (x && b) {
802     const PetscScalar *xx;
803     PetscScalar       *bb;
804 
805     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
806     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
807     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
808     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
809     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
810   }
811   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
812   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
813   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
814     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
815   } else if (diag != 0.0) {
816     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
817     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
818     for (r = 0; r < len; ++r) {
819       const PetscInt row = lrows[r] + A->rmap->rstart;
820       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
821     }
822     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824   } else {
825     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
826   }
827   ierr = PetscFree(lrows);CHKERRQ(ierr);
828 
829   /* only change matrix nonzero state if pattern was allowed to be changed */
830   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
831     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
832     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
833   }
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
840 {
841   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
842   PetscErrorCode    ierr;
843   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
844   PetscInt          i,j,r,m,p = 0,len = 0;
845   PetscInt          *lrows,*owners = A->rmap->range;
846   PetscSFNode       *rrows;
847   PetscSF           sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb,*mask;
850   Vec               xmask,lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
852   const PetscInt    *aj, *ii,*ridx;
853   PetscScalar       *aa;
854 #if defined(PETSC_DEBUG)
855   PetscBool found = PETSC_FALSE;
856 #endif
857 
858   PetscFunctionBegin;
859   /* Create SF where leaves are input rows and roots are owned rows */
860   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
861   for (r = 0; r < n; ++r) lrows[r] = -1;
862   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
863   for (r = 0; r < N; ++r) {
864     const PetscInt idx   = rows[r];
865     PetscBool      found = PETSC_FALSE;
866     /* Trick for efficient searching for sorted rows */
867     if (lastidx > idx) p = 0;
868     lastidx = idx;
869     for (; p < size; ++p) {
870       if (idx >= owners[p] && idx < owners[p+1]) {
871         rrows[r].rank  = p;
872         rrows[r].index = rows[r] - owners[p];
873         found = PETSC_TRUE;
874         break;
875       }
876     }
877     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
878   }
879   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
880   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
881   /* Collect flags for rows to be zeroed */
882   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
883   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
884   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
885   /* Compress and put in row numbers */
886   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
887   /* zero diagonal part of matrix */
888   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
889   /* handle off diagonal part of matrix */
890   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
891   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
892   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
893   for (i=0; i<len; i++) bb[lrows[i]] = 1;
894   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
895   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
897   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
898   if (x) {
899     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
902     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
903   }
904   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
905   /* remove zeroed rows of off diagonal matrix */
906   ii = aij->i;
907   for (i=0; i<len; i++) {
908     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
909   }
910   /* loop over all elements of off process part of matrix zeroing removed columns*/
911   if (aij->compressedrow.use) {
912     m    = aij->compressedrow.nrows;
913     ii   = aij->compressedrow.i;
914     ridx = aij->compressedrow.rindex;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919 
920       for (j=0; j<n; j++) {
921         if (PetscAbsScalar(mask[*aj])) {
922           if (b) bb[*ridx] -= *aa*xx[*aj];
923           *aa = 0.0;
924         }
925         aa++;
926         aj++;
927       }
928       ridx++;
929     }
930   } else { /* do not use compressed row format */
931     m = l->B->rmap->n;
932     for (i=0; i<m; i++) {
933       n  = ii[i+1] - ii[i];
934       aj = aij->j + ii[i];
935       aa = aij->a + ii[i];
936       for (j=0; j<n; j++) {
937         if (PetscAbsScalar(mask[*aj])) {
938           if (b) bb[i] -= *aa*xx[*aj];
939           *aa = 0.0;
940         }
941         aa++;
942         aj++;
943       }
944     }
945   }
946   if (x) {
947     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
948     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
949   }
950   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
951   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
952   ierr = PetscFree(lrows);CHKERRQ(ierr);
953 
954   /* only change matrix nonzero state if pattern was allowed to be changed */
955   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
956     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
957     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMult_MPIAIJ"
964 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
965 {
966   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
967   PetscErrorCode ierr;
968   PetscInt       nt;
969 
970   PetscFunctionBegin;
971   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
972   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
973   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
974   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
975   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
983 {
984   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatMultAdd_MPIAIJ"
994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998 
999   PetscFunctionBegin;
1000   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1009 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1010 {
1011   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1012   PetscErrorCode ierr;
1013   PetscBool      merged;
1014 
1015   PetscFunctionBegin;
1016   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1017   /* do nondiagonal part */
1018   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1019   if (!merged) {
1020     /* send it on its way */
1021     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1022     /* do local part */
1023     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1024     /* receive remote parts: note this assumes the values are not actually */
1025     /* added in yy until the next line, */
1026     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1027   } else {
1028     /* do local part */
1029     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1030     /* send it on its way */
1031     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1032     /* values actually were received in the Begin() but we need to call this nop */
1033     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1034   }
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1040 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1041 {
1042   MPI_Comm       comm;
1043   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1044   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1045   IS             Me,Notme;
1046   PetscErrorCode ierr;
1047   PetscInt       M,N,first,last,*notme,i;
1048   PetscMPIInt    size;
1049 
1050   PetscFunctionBegin;
1051   /* Easy test: symmetric diagonal block */
1052   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1053   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1054   if (!*f) PetscFunctionReturn(0);
1055   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1057   if (size == 1) PetscFunctionReturn(0);
1058 
1059   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1060   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1061   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1062   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1063   for (i=0; i<first; i++) notme[i] = i;
1064   for (i=last; i<M; i++) notme[i-last+first] = i;
1065   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1066   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1067   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1068   Aoff = Aoffs[0];
1069   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1070   Boff = Boffs[0];
1071   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1072   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1073   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1074   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1075   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1076   ierr = PetscFree(notme);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1082 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1083 {
1084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1085   PetscErrorCode ierr;
1086 
1087   PetscFunctionBegin;
1088   /* do nondiagonal part */
1089   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1090   /* send it on its way */
1091   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   /* do local part */
1093   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1094   /* receive remote parts */
1095   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /*
1100   This only works correctly for square matrices where the subblock A->A is the
1101    diagonal block
1102 */
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1105 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1106 {
1107   PetscErrorCode ierr;
1108   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1109 
1110   PetscFunctionBegin;
1111   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1112   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1113   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatScale_MPIAIJ"
1119 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1120 {
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1126   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatDestroy_Redundant"
1132 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1133 {
1134   PetscErrorCode ierr;
1135   Mat_Redundant  *redund = *redundant;
1136   PetscInt       i;
1137 
1138   PetscFunctionBegin;
1139   *redundant = NULL;
1140   if (redund){
1141     if (redund->matseq) { /* via MatGetSubMatrices()  */
1142       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1143       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1144       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1145       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1146     } else {
1147       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1148       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1149       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1150       for (i=0; i<redund->nrecvs; i++) {
1151         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1152         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1153       }
1154       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1155     }
1156 
1157     if (redund->psubcomm) {
1158       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1159     }
1160     ierr = PetscFree(redund);CHKERRQ(ierr);
1161   }
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatDestroy_MPIAIJ"
1167 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1168 {
1169   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173 #if defined(PETSC_USE_LOG)
1174   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1175 #endif
1176   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1177   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1178   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1179   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1180   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1181 #if defined(PETSC_USE_CTABLE)
1182   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1183 #else
1184   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1185 #endif
1186   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1187   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1188   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1189   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1190   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1191   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1192 
1193   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1194   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1196   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1198   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1199   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1200   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1201   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1208 {
1209   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1210   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1211   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1212   PetscErrorCode ierr;
1213   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1214   int            fd;
1215   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1216   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1217   PetscScalar    *column_values;
1218   PetscInt       message_count,flowcontrolcount;
1219   FILE           *file;
1220 
1221   PetscFunctionBegin;
1222   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1223   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1224   nz   = A->nz + B->nz;
1225   if (!rank) {
1226     header[0] = MAT_FILE_CLASSID;
1227     header[1] = mat->rmap->N;
1228     header[2] = mat->cmap->N;
1229 
1230     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1232     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     /* get largest number of rows any processor has */
1234     rlen  = mat->rmap->n;
1235     range = mat->rmap->range;
1236     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1237   } else {
1238     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     rlen = mat->rmap->n;
1240   }
1241 
1242   /* load up the local row counts */
1243   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1244   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1245 
1246   /* store the row lengths to the file */
1247   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1248   if (!rank) {
1249     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1250     for (i=1; i<size; i++) {
1251       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1252       rlen = range[i+1] - range[i];
1253       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1255     }
1256     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1257   } else {
1258     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1259     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1261   }
1262   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1263 
1264   /* load up the local column indices */
1265   nzmax = nz; /* th processor needs space a largest processor needs */
1266   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1268   cnt   = 0;
1269   for (i=0; i<mat->rmap->n; i++) {
1270     for (j=B->i[i]; j<B->i[i+1]; j++) {
1271       if ((col = garray[B->j[j]]) > cstart) break;
1272       column_indices[cnt++] = col;
1273     }
1274     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1275     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1276   }
1277   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1278 
1279   /* store the column indices to the file */
1280   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1281   if (!rank) {
1282     MPI_Status status;
1283     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     for (i=1; i<size; i++) {
1285       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1286       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1287       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1288       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1289       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1290     }
1291     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1292   } else {
1293     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1294     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1297   }
1298   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1299 
1300   /* load up the local column values */
1301   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1302   cnt  = 0;
1303   for (i=0; i<mat->rmap->n; i++) {
1304     for (j=B->i[i]; j<B->i[i+1]; j++) {
1305       if (garray[B->j[j]] > cstart) break;
1306       column_values[cnt++] = B->a[j];
1307     }
1308     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1309     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1310   }
1311   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1312 
1313   /* store the column values to the file */
1314   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1315   if (!rank) {
1316     MPI_Status status;
1317     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1318     for (i=1; i<size; i++) {
1319       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1320       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1321       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1322       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1323       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1324     }
1325     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1326   } else {
1327     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1328     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1329     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1330     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1331   }
1332   ierr = PetscFree(column_values);CHKERRQ(ierr);
1333 
1334   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1335   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #include <petscdraw.h>
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1342 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1343 {
1344   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1345   PetscErrorCode    ierr;
1346   PetscMPIInt       rank = aij->rank,size = aij->size;
1347   PetscBool         isdraw,iascii,isbinary;
1348   PetscViewer       sviewer;
1349   PetscViewerFormat format;
1350 
1351   PetscFunctionBegin;
1352   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1353   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1354   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1355   if (iascii) {
1356     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1357     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1358       MatInfo   info;
1359       PetscBool inodes;
1360 
1361       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1362       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1363       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1365       if (!inodes) {
1366         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1367                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1368       } else {
1369         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1370                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1371       }
1372       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1374       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1375       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1376       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1377       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1378       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1379       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1380       PetscFunctionReturn(0);
1381     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1382       PetscInt inodecount,inodelimit,*inodes;
1383       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1384       if (inodes) {
1385         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1386       } else {
1387         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1388       }
1389       PetscFunctionReturn(0);
1390     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1391       PetscFunctionReturn(0);
1392     }
1393   } else if (isbinary) {
1394     if (size == 1) {
1395       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1396       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1397     } else {
1398       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1399     }
1400     PetscFunctionReturn(0);
1401   } else if (isdraw) {
1402     PetscDraw draw;
1403     PetscBool isnull;
1404     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1405     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1406   }
1407 
1408   {
1409     /* assemble the entire matrix onto first processor. */
1410     Mat        A;
1411     Mat_SeqAIJ *Aloc;
1412     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1413     MatScalar  *a;
1414 
1415     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1416     if (!rank) {
1417       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1418     } else {
1419       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1420     }
1421     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1422     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1423     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1424     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1425     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1426 
1427     /* copy over the A part */
1428     Aloc = (Mat_SeqAIJ*)aij->A->data;
1429     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1432     for (i=0; i<m; i++) {
1433       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1434       row++;
1435       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1436     }
1437     aj = Aloc->j;
1438     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1439 
1440     /* copy over the B part */
1441     Aloc = (Mat_SeqAIJ*)aij->B->data;
1442     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1443     row  = mat->rmap->rstart;
1444     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1445     ct   = cols;
1446     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1447     for (i=0; i<m; i++) {
1448       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1449       row++;
1450       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1451     }
1452     ierr = PetscFree(ct);CHKERRQ(ierr);
1453     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1454     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455     /*
1456        Everyone has to call to draw the matrix since the graphics waits are
1457        synchronized across all processors that share the PetscDraw object
1458     */
1459     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1460     if (!rank) {
1461       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1462     }
1463     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1464     ierr = MatDestroy(&A);CHKERRQ(ierr);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatView_MPIAIJ"
1471 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1472 {
1473   PetscErrorCode ierr;
1474   PetscBool      iascii,isdraw,issocket,isbinary;
1475 
1476   PetscFunctionBegin;
1477   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1478   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1479   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1480   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1481   if (iascii || isdraw || isbinary || issocket) {
1482     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1483   }
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatSOR_MPIAIJ"
1489 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1490 {
1491   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1492   PetscErrorCode ierr;
1493   Vec            bb1 = 0;
1494   PetscBool      hasop;
1495 
1496   PetscFunctionBegin;
1497   if (flag == SOR_APPLY_UPPER) {
1498     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1499     PetscFunctionReturn(0);
1500   }
1501 
1502   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1503     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1504   }
1505 
1506   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1507     if (flag & SOR_ZERO_INITIAL_GUESS) {
1508       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1509       its--;
1510     }
1511 
1512     while (its--) {
1513       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515 
1516       /* update rhs: bb1 = bb - B*x */
1517       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1518       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1519 
1520       /* local sweep */
1521       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1522     }
1523   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1524     if (flag & SOR_ZERO_INITIAL_GUESS) {
1525       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1526       its--;
1527     }
1528     while (its--) {
1529       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1531 
1532       /* update rhs: bb1 = bb - B*x */
1533       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1534       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1535 
1536       /* local sweep */
1537       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1538     }
1539   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1540     if (flag & SOR_ZERO_INITIAL_GUESS) {
1541       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1542       its--;
1543     }
1544     while (its--) {
1545       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1546       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1547 
1548       /* update rhs: bb1 = bb - B*x */
1549       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1550       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1551 
1552       /* local sweep */
1553       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1554     }
1555   } else if (flag & SOR_EISENSTAT) {
1556     Vec xx1;
1557 
1558     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1559     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1560 
1561     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1562     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563     if (!mat->diag) {
1564       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1565       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1566     }
1567     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1568     if (hasop) {
1569       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1570     } else {
1571       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1572     }
1573     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1574 
1575     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1576 
1577     /* local sweep */
1578     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1579     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1580     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1581   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1582 
1583   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatPermute_MPIAIJ"
1589 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1590 {
1591   Mat            aA,aB,Aperm;
1592   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1593   PetscScalar    *aa,*ba;
1594   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1595   PetscSF        rowsf,sf;
1596   IS             parcolp = NULL;
1597   PetscBool      done;
1598   PetscErrorCode ierr;
1599 
1600   PetscFunctionBegin;
1601   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1602   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1603   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1604   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1605 
1606   /* Invert row permutation to find out where my rows should go */
1607   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1608   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1609   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1610   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1611   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1612   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1613 
1614   /* Invert column permutation to find out where my columns should go */
1615   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1616   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1617   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1618   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1619   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1620   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1621   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1622 
1623   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1624   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1625   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1626 
1627   /* Find out where my gcols should go */
1628   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1629   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1630   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1631   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1632   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1633   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1634   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1635   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1636 
1637   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1638   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1639   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1640   for (i=0; i<m; i++) {
1641     PetscInt row = rdest[i],rowner;
1642     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1643     for (j=ai[i]; j<ai[i+1]; j++) {
1644       PetscInt cowner,col = cdest[aj[j]];
1645       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1646       if (rowner == cowner) dnnz[i]++;
1647       else onnz[i]++;
1648     }
1649     for (j=bi[i]; j<bi[i+1]; j++) {
1650       PetscInt cowner,col = gcdest[bj[j]];
1651       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1652       if (rowner == cowner) dnnz[i]++;
1653       else onnz[i]++;
1654     }
1655   }
1656   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1657   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1658   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1659   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1660   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1661 
1662   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1663   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1664   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1665   for (i=0; i<m; i++) {
1666     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1667     PetscInt j0,rowlen;
1668     rowlen = ai[i+1] - ai[i];
1669     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1670       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1671       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1672     }
1673     rowlen = bi[i+1] - bi[i];
1674     for (j0=j=0; j<rowlen; j0=j) {
1675       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1676       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1677     }
1678   }
1679   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1682   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1683   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1684   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1685   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1686   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1687   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1688   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1689   *B = Aperm;
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #undef __FUNCT__
1694 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1695 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1696 {
1697   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1698   Mat            A    = mat->A,B = mat->B;
1699   PetscErrorCode ierr;
1700   PetscReal      isend[5],irecv[5];
1701 
1702   PetscFunctionBegin;
1703   info->block_size = 1.0;
1704   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1705 
1706   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1707   isend[3] = info->memory;  isend[4] = info->mallocs;
1708 
1709   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1710 
1711   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1712   isend[3] += info->memory;  isend[4] += info->mallocs;
1713   if (flag == MAT_LOCAL) {
1714     info->nz_used      = isend[0];
1715     info->nz_allocated = isend[1];
1716     info->nz_unneeded  = isend[2];
1717     info->memory       = isend[3];
1718     info->mallocs      = isend[4];
1719   } else if (flag == MAT_GLOBAL_MAX) {
1720     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1721 
1722     info->nz_used      = irecv[0];
1723     info->nz_allocated = irecv[1];
1724     info->nz_unneeded  = irecv[2];
1725     info->memory       = irecv[3];
1726     info->mallocs      = irecv[4];
1727   } else if (flag == MAT_GLOBAL_SUM) {
1728     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1729 
1730     info->nz_used      = irecv[0];
1731     info->nz_allocated = irecv[1];
1732     info->nz_unneeded  = irecv[2];
1733     info->memory       = irecv[3];
1734     info->mallocs      = irecv[4];
1735   }
1736   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1737   info->fill_ratio_needed = 0;
1738   info->factor_mallocs    = 0;
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetOption_MPIAIJ"
1744 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1745 {
1746   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   switch (op) {
1751   case MAT_NEW_NONZERO_LOCATIONS:
1752   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1753   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1754   case MAT_KEEP_NONZERO_PATTERN:
1755   case MAT_NEW_NONZERO_LOCATION_ERR:
1756   case MAT_USE_INODES:
1757   case MAT_IGNORE_ZERO_ENTRIES:
1758     MatCheckPreallocated(A,1);
1759     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1760     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_ROW_ORIENTED:
1763     a->roworiented = flg;
1764 
1765     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1766     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1767     break;
1768   case MAT_NEW_DIAGONALS:
1769     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1770     break;
1771   case MAT_IGNORE_OFF_PROC_ENTRIES:
1772     a->donotstash = flg;
1773     break;
1774   case MAT_SPD:
1775     A->spd_set = PETSC_TRUE;
1776     A->spd     = flg;
1777     if (flg) {
1778       A->symmetric                  = PETSC_TRUE;
1779       A->structurally_symmetric     = PETSC_TRUE;
1780       A->symmetric_set              = PETSC_TRUE;
1781       A->structurally_symmetric_set = PETSC_TRUE;
1782     }
1783     break;
1784   case MAT_SYMMETRIC:
1785     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1786     break;
1787   case MAT_STRUCTURALLY_SYMMETRIC:
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     break;
1790   case MAT_HERMITIAN:
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     break;
1793   case MAT_SYMMETRY_ETERNAL:
1794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1795     break;
1796   default:
1797     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1798   }
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatGetRow_MPIAIJ"
1804 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1805 {
1806   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1807   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1808   PetscErrorCode ierr;
1809   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1810   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1811   PetscInt       *cmap,*idx_p;
1812 
1813   PetscFunctionBegin;
1814   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1815   mat->getrowactive = PETSC_TRUE;
1816 
1817   if (!mat->rowvalues && (idx || v)) {
1818     /*
1819         allocate enough space to hold information from the longest row.
1820     */
1821     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1822     PetscInt   max = 1,tmp;
1823     for (i=0; i<matin->rmap->n; i++) {
1824       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1825       if (max < tmp) max = tmp;
1826     }
1827     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1828   }
1829 
1830   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1831   lrow = row - rstart;
1832 
1833   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1834   if (!v)   {pvA = 0; pvB = 0;}
1835   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1836   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1837   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1838   nztot = nzA + nzB;
1839 
1840   cmap = mat->garray;
1841   if (v  || idx) {
1842     if (nztot) {
1843       /* Sort by increasing column numbers, assuming A and B already sorted */
1844       PetscInt imark = -1;
1845       if (v) {
1846         *v = v_p = mat->rowvalues;
1847         for (i=0; i<nzB; i++) {
1848           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1849           else break;
1850         }
1851         imark = i;
1852         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1853         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1854       }
1855       if (idx) {
1856         *idx = idx_p = mat->rowindices;
1857         if (imark > -1) {
1858           for (i=0; i<imark; i++) {
1859             idx_p[i] = cmap[cworkB[i]];
1860           }
1861         } else {
1862           for (i=0; i<nzB; i++) {
1863             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1864             else break;
1865           }
1866           imark = i;
1867         }
1868         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1869         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1870       }
1871     } else {
1872       if (idx) *idx = 0;
1873       if (v)   *v   = 0;
1874     }
1875   }
1876   *nz  = nztot;
1877   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1878   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1884 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1885 {
1886   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1887 
1888   PetscFunctionBegin;
1889   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1890   aij->getrowactive = PETSC_FALSE;
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatNorm_MPIAIJ"
1896 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1897 {
1898   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1899   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1900   PetscErrorCode ierr;
1901   PetscInt       i,j,cstart = mat->cmap->rstart;
1902   PetscReal      sum = 0.0;
1903   MatScalar      *v;
1904 
1905   PetscFunctionBegin;
1906   if (aij->size == 1) {
1907     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1908   } else {
1909     if (type == NORM_FROBENIUS) {
1910       v = amat->a;
1911       for (i=0; i<amat->nz; i++) {
1912         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1913       }
1914       v = bmat->a;
1915       for (i=0; i<bmat->nz; i++) {
1916         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1917       }
1918       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1919       *norm = PetscSqrtReal(*norm);
1920     } else if (type == NORM_1) { /* max column norm */
1921       PetscReal *tmp,*tmp2;
1922       PetscInt  *jj,*garray = aij->garray;
1923       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1924       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1925       *norm = 0.0;
1926       v     = amat->a; jj = amat->j;
1927       for (j=0; j<amat->nz; j++) {
1928         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1929       }
1930       v = bmat->a; jj = bmat->j;
1931       for (j=0; j<bmat->nz; j++) {
1932         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1933       }
1934       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1935       for (j=0; j<mat->cmap->N; j++) {
1936         if (tmp2[j] > *norm) *norm = tmp2[j];
1937       }
1938       ierr = PetscFree(tmp);CHKERRQ(ierr);
1939       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1940     } else if (type == NORM_INFINITY) { /* max row norm */
1941       PetscReal ntemp = 0.0;
1942       for (j=0; j<aij->A->rmap->n; j++) {
1943         v   = amat->a + amat->i[j];
1944         sum = 0.0;
1945         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1946           sum += PetscAbsScalar(*v); v++;
1947         }
1948         v = bmat->a + bmat->i[j];
1949         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1950           sum += PetscAbsScalar(*v); v++;
1951         }
1952         if (sum > ntemp) ntemp = sum;
1953       }
1954       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1955     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatTranspose_MPIAIJ"
1962 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1963 {
1964   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1965   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1966   PetscErrorCode ierr;
1967   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1968   PetscInt       cstart = A->cmap->rstart,ncol;
1969   Mat            B;
1970   MatScalar      *array;
1971 
1972   PetscFunctionBegin;
1973   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1974 
1975   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1976   ai = Aloc->i; aj = Aloc->j;
1977   bi = Bloc->i; bj = Bloc->j;
1978   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1979     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1980     PetscSFNode          *oloc;
1981     PETSC_UNUSED PetscSF sf;
1982 
1983     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1984     /* compute d_nnz for preallocation */
1985     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1986     for (i=0; i<ai[ma]; i++) {
1987       d_nnz[aj[i]]++;
1988       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1989     }
1990     /* compute local off-diagonal contributions */
1991     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1992     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1993     /* map those to global */
1994     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1995     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1996     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1997     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1998     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1999     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2000     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2001 
2002     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2003     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2004     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2005     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2006     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2007     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2008   } else {
2009     B    = *matout;
2010     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2011     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2012   }
2013 
2014   /* copy over the A part */
2015   array = Aloc->a;
2016   row   = A->rmap->rstart;
2017   for (i=0; i<ma; i++) {
2018     ncol = ai[i+1]-ai[i];
2019     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2020     row++;
2021     array += ncol; aj += ncol;
2022   }
2023   aj = Aloc->j;
2024   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2025 
2026   /* copy over the B part */
2027   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2028   array = Bloc->a;
2029   row   = A->rmap->rstart;
2030   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2031   cols_tmp = cols;
2032   for (i=0; i<mb; i++) {
2033     ncol = bi[i+1]-bi[i];
2034     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2035     row++;
2036     array += ncol; cols_tmp += ncol;
2037   }
2038   ierr = PetscFree(cols);CHKERRQ(ierr);
2039 
2040   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2041   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2042   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2043     *matout = B;
2044   } else {
2045     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2046   }
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 #undef __FUNCT__
2051 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2052 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2053 {
2054   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2055   Mat            a    = aij->A,b = aij->B;
2056   PetscErrorCode ierr;
2057   PetscInt       s1,s2,s3;
2058 
2059   PetscFunctionBegin;
2060   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2061   if (rr) {
2062     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2063     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2064     /* Overlap communication with computation. */
2065     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2066   }
2067   if (ll) {
2068     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2069     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2070     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2071   }
2072   /* scale  the diagonal block */
2073   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2074 
2075   if (rr) {
2076     /* Do a scatter end and then right scale the off-diagonal block */
2077     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2078     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2079   }
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2085 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2086 {
2087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2088   PetscErrorCode ierr;
2089 
2090   PetscFunctionBegin;
2091   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatEqual_MPIAIJ"
2097 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2098 {
2099   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2100   Mat            a,b,c,d;
2101   PetscBool      flg;
2102   PetscErrorCode ierr;
2103 
2104   PetscFunctionBegin;
2105   a = matA->A; b = matA->B;
2106   c = matB->A; d = matB->B;
2107 
2108   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2109   if (flg) {
2110     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2111   }
2112   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatCopy_MPIAIJ"
2118 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2119 {
2120   PetscErrorCode ierr;
2121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2122   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2123 
2124   PetscFunctionBegin;
2125   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2126   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2127     /* because of the column compression in the off-processor part of the matrix a->B,
2128        the number of columns in a->B and b->B may be different, hence we cannot call
2129        the MatCopy() directly on the two parts. If need be, we can provide a more
2130        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2131        then copying the submatrices */
2132     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2133   } else {
2134     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2135     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2136   }
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatSetUp_MPIAIJ"
2142 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2143 {
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2153 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2154 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2155 {
2156   PetscInt       i,m=Y->rmap->N;
2157   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2158   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2159   const PetscInt *xi = x->i,*yi = y->i;
2160 
2161   PetscFunctionBegin;
2162   /* Set the number of nonzeros in the new matrix */
2163   for (i=0; i<m; i++) {
2164     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2165     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2166     nnz[i] = 0;
2167     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2168       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2169       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2170       nnz[i]++;
2171     }
2172     for (; k<nzy; k++) nnz[i]++;
2173   }
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatAXPY_MPIAIJ"
2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2180 {
2181   PetscErrorCode ierr;
2182   PetscInt       i;
2183   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2184   PetscBLASInt   bnz,one=1;
2185   Mat_SeqAIJ     *x,*y;
2186 
2187   PetscFunctionBegin;
2188   if (str == SAME_NONZERO_PATTERN) {
2189     PetscScalar alpha = a;
2190     x    = (Mat_SeqAIJ*)xx->A->data;
2191     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2192     y    = (Mat_SeqAIJ*)yy->A->data;
2193     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2194     x    = (Mat_SeqAIJ*)xx->B->data;
2195     y    = (Mat_SeqAIJ*)yy->B->data;
2196     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2197     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2198     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2199   } else if (str == SUBSET_NONZERO_PATTERN) {
2200     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2201 
2202     x = (Mat_SeqAIJ*)xx->B->data;
2203     y = (Mat_SeqAIJ*)yy->B->data;
2204     if (y->xtoy && y->XtoY != xx->B) {
2205       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2206       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2207     }
2208     if (!y->xtoy) { /* get xtoy */
2209       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2210       y->XtoY = xx->B;
2211       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2212     }
2213     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2214     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2215   } else {
2216     Mat      B;
2217     PetscInt *nnz_d,*nnz_o;
2218     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2219     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2220     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2221     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2222     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2223     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2224     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2225     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2226     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2227     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2228     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2229     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2230     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2231     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2232   }
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatConjugate_MPIAIJ"
2240 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2241 {
2242 #if defined(PETSC_USE_COMPLEX)
2243   PetscErrorCode ierr;
2244   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2245 
2246   PetscFunctionBegin;
2247   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2248   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2249 #else
2250   PetscFunctionBegin;
2251 #endif
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatRealPart_MPIAIJ"
2257 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2258 {
2259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2260   PetscErrorCode ierr;
2261 
2262   PetscFunctionBegin;
2263   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2264   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2270 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2271 {
2272   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2273   PetscErrorCode ierr;
2274 
2275   PetscFunctionBegin;
2276   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2277   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #if defined(PETSC_HAVE_PBGL)
2282 
2283 #include <boost/parallel/mpi/bsp_process_group.hpp>
2284 #include <boost/graph/distributed/ilu_default_graph.hpp>
2285 #include <boost/graph/distributed/ilu_0_block.hpp>
2286 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2287 #include <boost/graph/distributed/petsc/interface.hpp>
2288 #include <boost/multi_array.hpp>
2289 #include <boost/parallel/distributed_property_map->hpp>
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2293 /*
2294   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2295 */
2296 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2297 {
2298   namespace petsc = boost::distributed::petsc;
2299 
2300   namespace graph_dist = boost::graph::distributed;
2301   using boost::graph::distributed::ilu_default::process_group_type;
2302   using boost::graph::ilu_permuted;
2303 
2304   PetscBool      row_identity, col_identity;
2305   PetscContainer c;
2306   PetscInt       m, n, M, N;
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2311   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2312   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2313   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2314 
2315   process_group_type pg;
2316   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2317   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2318   lgraph_type& level_graph = *lgraph_p;
2319   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2320 
2321   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2322   ilu_permuted(level_graph);
2323 
2324   /* put together the new matrix */
2325   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2326   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2327   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2328   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2329   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2330   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2331   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2332   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2333 
2334   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2335   ierr = PetscContainerSetPointer(c, lgraph_p);
2336   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2337   ierr = PetscContainerDestroy(&c);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2343 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2344 {
2345   PetscFunctionBegin;
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatSolve_MPIAIJ"
2351 /*
2352   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2353 */
2354 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2355 {
2356   namespace graph_dist = boost::graph::distributed;
2357 
2358   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2359   lgraph_type    *lgraph_p;
2360   PetscContainer c;
2361   PetscErrorCode ierr;
2362 
2363   PetscFunctionBegin;
2364   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2365   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2366   ierr = VecCopy(b, x);CHKERRQ(ierr);
2367 
2368   PetscScalar *array_x;
2369   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2370   PetscInt sx;
2371   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2372 
2373   PetscScalar *array_b;
2374   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2375   PetscInt sb;
2376   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2377 
2378   lgraph_type& level_graph = *lgraph_p;
2379   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2380 
2381   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2382   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2383   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2384 
2385   typedef boost::iterator_property_map<array_ref_type::iterator,
2386                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2387   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2388   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2389 
2390   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2391   PetscFunctionReturn(0);
2392 }
2393 #endif
2394 
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2398 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2399 {
2400   PetscMPIInt    rank,size;
2401   MPI_Comm       comm;
2402   PetscErrorCode ierr;
2403   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2404   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2405   PetscInt       *rowrange = mat->rmap->range;
2406   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2407   Mat            A = aij->A,B=aij->B,C=*matredundant;
2408   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2409   PetscScalar    *sbuf_a;
2410   PetscInt       nzlocal=a->nz+b->nz;
2411   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2412   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2413   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2414   MatScalar      *aworkA,*aworkB;
2415   PetscScalar    *vals;
2416   PetscMPIInt    tag1,tag2,tag3,imdex;
2417   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2418   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2419   MPI_Status     recv_status,*send_status;
2420   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2421   PetscInt       **rbuf_j=NULL;
2422   PetscScalar    **rbuf_a=NULL;
2423   Mat_Redundant  *redund =NULL;
2424 
2425   PetscFunctionBegin;
2426   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2427   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2428   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2429   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2430   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2431 
2432   if (reuse == MAT_REUSE_MATRIX) {
2433     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2434     if (subsize == 1) {
2435       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2436       redund = c->redundant;
2437     } else {
2438       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2439       redund = c->redundant;
2440     }
2441     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2442 
2443     nsends    = redund->nsends;
2444     nrecvs    = redund->nrecvs;
2445     send_rank = redund->send_rank;
2446     recv_rank = redund->recv_rank;
2447     sbuf_nz   = redund->sbuf_nz;
2448     rbuf_nz   = redund->rbuf_nz;
2449     sbuf_j    = redund->sbuf_j;
2450     sbuf_a    = redund->sbuf_a;
2451     rbuf_j    = redund->rbuf_j;
2452     rbuf_a    = redund->rbuf_a;
2453   }
2454 
2455   if (reuse == MAT_INITIAL_MATRIX) {
2456     PetscInt    nleftover,np_subcomm;
2457 
2458     /* get the destination processors' id send_rank, nsends and nrecvs */
2459     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2460 
2461     np_subcomm = size/nsubcomm;
2462     nleftover  = size - nsubcomm*np_subcomm;
2463 
2464     /* block of codes below is specific for INTERLACED */
2465     /* ------------------------------------------------*/
2466     nsends = 0; nrecvs = 0;
2467     for (i=0; i<size; i++) {
2468       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2469         send_rank[nsends++] = i;
2470         recv_rank[nrecvs++] = i;
2471       }
2472     }
2473     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2474       i = size-nleftover-1;
2475       j = 0;
2476       while (j < nsubcomm - nleftover) {
2477         send_rank[nsends++] = i;
2478         i--; j++;
2479       }
2480     }
2481 
2482     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2483       for (i=0; i<nleftover; i++) {
2484         recv_rank[nrecvs++] = size-nleftover+i;
2485       }
2486     }
2487     /*----------------------------------------------*/
2488 
2489     /* allocate sbuf_j, sbuf_a */
2490     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2491     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2492     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2493     /*
2494     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2495     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2496      */
2497   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2498 
2499   /* copy mat's local entries into the buffers */
2500   if (reuse == MAT_INITIAL_MATRIX) {
2501     rownz_max = 0;
2502     rptr      = sbuf_j;
2503     cols      = sbuf_j + rend-rstart + 1;
2504     vals      = sbuf_a;
2505     rptr[0]   = 0;
2506     for (i=0; i<rend-rstart; i++) {
2507       row    = i + rstart;
2508       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2509       ncols  = nzA + nzB;
2510       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2511       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2512       /* load the column indices for this row into cols */
2513       lwrite = 0;
2514       for (l=0; l<nzB; l++) {
2515         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2516           vals[lwrite]   = aworkB[l];
2517           cols[lwrite++] = ctmp;
2518         }
2519       }
2520       for (l=0; l<nzA; l++) {
2521         vals[lwrite]   = aworkA[l];
2522         cols[lwrite++] = cstart + cworkA[l];
2523       }
2524       for (l=0; l<nzB; l++) {
2525         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2526           vals[lwrite]   = aworkB[l];
2527           cols[lwrite++] = ctmp;
2528         }
2529       }
2530       vals     += ncols;
2531       cols     += ncols;
2532       rptr[i+1] = rptr[i] + ncols;
2533       if (rownz_max < ncols) rownz_max = ncols;
2534     }
2535     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2536   } else { /* only copy matrix values into sbuf_a */
2537     rptr    = sbuf_j;
2538     vals    = sbuf_a;
2539     rptr[0] = 0;
2540     for (i=0; i<rend-rstart; i++) {
2541       row    = i + rstart;
2542       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2543       ncols  = nzA + nzB;
2544       cworkB = b->j + b->i[i];
2545       aworkA = a->a + a->i[i];
2546       aworkB = b->a + b->i[i];
2547       lwrite = 0;
2548       for (l=0; l<nzB; l++) {
2549         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2550       }
2551       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2552       for (l=0; l<nzB; l++) {
2553         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2554       }
2555       vals     += ncols;
2556       rptr[i+1] = rptr[i] + ncols;
2557     }
2558   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2559 
2560   /* send nzlocal to others, and recv other's nzlocal */
2561   /*--------------------------------------------------*/
2562   if (reuse == MAT_INITIAL_MATRIX) {
2563     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2564 
2565     s_waits2 = s_waits3 + nsends;
2566     s_waits1 = s_waits2 + nsends;
2567     r_waits1 = s_waits1 + nsends;
2568     r_waits2 = r_waits1 + nrecvs;
2569     r_waits3 = r_waits2 + nrecvs;
2570   } else {
2571     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2572 
2573     r_waits3 = s_waits3 + nsends;
2574   }
2575 
2576   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2577   if (reuse == MAT_INITIAL_MATRIX) {
2578     /* get new tags to keep the communication clean */
2579     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2580     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2581     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2582 
2583     /* post receives of other's nzlocal */
2584     for (i=0; i<nrecvs; i++) {
2585       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2586     }
2587     /* send nzlocal to others */
2588     for (i=0; i<nsends; i++) {
2589       sbuf_nz[i] = nzlocal;
2590       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2591     }
2592     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2593     count = nrecvs;
2594     while (count) {
2595       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2596 
2597       recv_rank[imdex] = recv_status.MPI_SOURCE;
2598       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2599       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2600 
2601       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2602 
2603       rbuf_nz[imdex] += i + 2;
2604 
2605       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2606       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2607       count--;
2608     }
2609     /* wait on sends of nzlocal */
2610     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2611     /* send mat->i,j to others, and recv from other's */
2612     /*------------------------------------------------*/
2613     for (i=0; i<nsends; i++) {
2614       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2615       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2616     }
2617     /* wait on receives of mat->i,j */
2618     /*------------------------------*/
2619     count = nrecvs;
2620     while (count) {
2621       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2622       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2623       count--;
2624     }
2625     /* wait on sends of mat->i,j */
2626     /*---------------------------*/
2627     if (nsends) {
2628       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2629     }
2630   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2631 
2632   /* post receives, send and receive mat->a */
2633   /*----------------------------------------*/
2634   for (imdex=0; imdex<nrecvs; imdex++) {
2635     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2636   }
2637   for (i=0; i<nsends; i++) {
2638     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2639   }
2640   count = nrecvs;
2641   while (count) {
2642     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2643     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2644     count--;
2645   }
2646   if (nsends) {
2647     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2648   }
2649 
2650   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2651 
2652   /* create redundant matrix */
2653   /*-------------------------*/
2654   if (reuse == MAT_INITIAL_MATRIX) {
2655     const PetscInt *range;
2656     PetscInt       rstart_sub,rend_sub,mloc_sub;
2657 
2658     /* compute rownz_max for preallocation */
2659     for (imdex=0; imdex<nrecvs; imdex++) {
2660       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2661       rptr = rbuf_j[imdex];
2662       for (i=0; i<j; i++) {
2663         ncols = rptr[i+1] - rptr[i];
2664         if (rownz_max < ncols) rownz_max = ncols;
2665       }
2666     }
2667 
2668     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2669 
2670     /* get local size of redundant matrix
2671        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2672     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2673     rstart_sub = range[nsubcomm*subrank];
2674     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2675       rend_sub = range[nsubcomm*(subrank+1)];
2676     } else {
2677       rend_sub = mat->rmap->N;
2678     }
2679     mloc_sub = rend_sub - rstart_sub;
2680 
2681     if (M == N) {
2682       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2683     } else { /* non-square matrix */
2684       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2685     }
2686     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2687     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2688     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2689     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2690   } else {
2691     C = *matredundant;
2692   }
2693 
2694   /* insert local matrix entries */
2695   rptr = sbuf_j;
2696   cols = sbuf_j + rend-rstart + 1;
2697   vals = sbuf_a;
2698   for (i=0; i<rend-rstart; i++) {
2699     row   = i + rstart;
2700     ncols = rptr[i+1] - rptr[i];
2701     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2702     vals += ncols;
2703     cols += ncols;
2704   }
2705   /* insert received matrix entries */
2706   for (imdex=0; imdex<nrecvs; imdex++) {
2707     rstart = rowrange[recv_rank[imdex]];
2708     rend   = rowrange[recv_rank[imdex]+1];
2709     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2710     rptr   = rbuf_j[imdex];
2711     cols   = rbuf_j[imdex] + rend-rstart + 1;
2712     vals   = rbuf_a[imdex];
2713     for (i=0; i<rend-rstart; i++) {
2714       row   = i + rstart;
2715       ncols = rptr[i+1] - rptr[i];
2716       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2717       vals += ncols;
2718       cols += ncols;
2719     }
2720   }
2721   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2722   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2723 
2724   if (reuse == MAT_INITIAL_MATRIX) {
2725     *matredundant = C;
2726 
2727     /* create a supporting struct and attach it to C for reuse */
2728     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2729     if (subsize == 1) {
2730       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2731       c->redundant = redund;
2732     } else {
2733       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2734       c->redundant = redund;
2735     }
2736 
2737     redund->nzlocal   = nzlocal;
2738     redund->nsends    = nsends;
2739     redund->nrecvs    = nrecvs;
2740     redund->send_rank = send_rank;
2741     redund->recv_rank = recv_rank;
2742     redund->sbuf_nz   = sbuf_nz;
2743     redund->rbuf_nz   = rbuf_nz;
2744     redund->sbuf_j    = sbuf_j;
2745     redund->sbuf_a    = sbuf_a;
2746     redund->rbuf_j    = rbuf_j;
2747     redund->rbuf_a    = rbuf_a;
2748     redund->psubcomm  = NULL;
2749   }
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2755 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2756 {
2757   PetscErrorCode ierr;
2758   MPI_Comm       comm;
2759   PetscMPIInt    size,subsize;
2760   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2761   Mat_Redundant  *redund=NULL;
2762   PetscSubcomm   psubcomm=NULL;
2763   MPI_Comm       subcomm_in=subcomm;
2764   Mat            *matseq;
2765   IS             isrow,iscol;
2766 
2767   PetscFunctionBegin;
2768   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2769     if (reuse ==  MAT_INITIAL_MATRIX) {
2770       /* create psubcomm, then get subcomm */
2771       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2772       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2773       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2774 
2775       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2776       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2777       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2778       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2779       subcomm = psubcomm->comm;
2780     } else { /* retrieve psubcomm and subcomm */
2781       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2782       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2783       if (subsize == 1) {
2784         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2785         redund = c->redundant;
2786       } else {
2787         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2788         redund = c->redundant;
2789       }
2790       psubcomm = redund->psubcomm;
2791     }
2792     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2793       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2794       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2795         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2796         if (subsize == 1) {
2797           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2798           c->redundant->psubcomm = psubcomm;
2799         } else {
2800           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2801           c->redundant->psubcomm = psubcomm ;
2802         }
2803       }
2804       PetscFunctionReturn(0);
2805     }
2806   }
2807 
2808   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2809   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2810   if (reuse == MAT_INITIAL_MATRIX) {
2811     /* create a local sequential matrix matseq[0] */
2812     mloc_sub = PETSC_DECIDE;
2813     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2814     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2815     rstart = rend - mloc_sub;
2816     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2817     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2818   } else { /* reuse == MAT_REUSE_MATRIX */
2819     if (subsize == 1) {
2820       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2821       redund = c->redundant;
2822     } else {
2823       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2824       redund = c->redundant;
2825     }
2826 
2827     isrow  = redund->isrow;
2828     iscol  = redund->iscol;
2829     matseq = redund->matseq;
2830   }
2831   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2832   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2833 
2834   if (reuse == MAT_INITIAL_MATRIX) {
2835     /* create a supporting struct and attach it to C for reuse */
2836     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2837     if (subsize == 1) {
2838       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2839       c->redundant = redund;
2840     } else {
2841       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2842       c->redundant = redund;
2843     }
2844     redund->isrow    = isrow;
2845     redund->iscol    = iscol;
2846     redund->matseq   = matseq;
2847     redund->psubcomm = psubcomm;
2848   }
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 #undef __FUNCT__
2853 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2854 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2855 {
2856   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2857   PetscErrorCode ierr;
2858   PetscInt       i,*idxb = 0;
2859   PetscScalar    *va,*vb;
2860   Vec            vtmp;
2861 
2862   PetscFunctionBegin;
2863   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2864   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2865   if (idx) {
2866     for (i=0; i<A->rmap->n; i++) {
2867       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2868     }
2869   }
2870 
2871   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2872   if (idx) {
2873     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2874   }
2875   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2876   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2877 
2878   for (i=0; i<A->rmap->n; i++) {
2879     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2880       va[i] = vb[i];
2881       if (idx) idx[i] = a->garray[idxb[i]];
2882     }
2883   }
2884 
2885   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2886   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2887   ierr = PetscFree(idxb);CHKERRQ(ierr);
2888   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 #undef __FUNCT__
2893 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2894 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2895 {
2896   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2897   PetscErrorCode ierr;
2898   PetscInt       i,*idxb = 0;
2899   PetscScalar    *va,*vb;
2900   Vec            vtmp;
2901 
2902   PetscFunctionBegin;
2903   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2904   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2905   if (idx) {
2906     for (i=0; i<A->cmap->n; i++) {
2907       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2908     }
2909   }
2910 
2911   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2912   if (idx) {
2913     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2914   }
2915   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2916   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2917 
2918   for (i=0; i<A->rmap->n; i++) {
2919     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2920       va[i] = vb[i];
2921       if (idx) idx[i] = a->garray[idxb[i]];
2922     }
2923   }
2924 
2925   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2926   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2927   ierr = PetscFree(idxb);CHKERRQ(ierr);
2928   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2929   PetscFunctionReturn(0);
2930 }
2931 
2932 #undef __FUNCT__
2933 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2934 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2935 {
2936   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2937   PetscInt       n      = A->rmap->n;
2938   PetscInt       cstart = A->cmap->rstart;
2939   PetscInt       *cmap  = mat->garray;
2940   PetscInt       *diagIdx, *offdiagIdx;
2941   Vec            diagV, offdiagV;
2942   PetscScalar    *a, *diagA, *offdiagA;
2943   PetscInt       r;
2944   PetscErrorCode ierr;
2945 
2946   PetscFunctionBegin;
2947   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2948   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2949   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2950   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2951   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2952   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2953   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2954   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2955   for (r = 0; r < n; ++r) {
2956     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2957       a[r]   = diagA[r];
2958       idx[r] = cstart + diagIdx[r];
2959     } else {
2960       a[r]   = offdiagA[r];
2961       idx[r] = cmap[offdiagIdx[r]];
2962     }
2963   }
2964   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2965   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2966   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2967   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2968   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2969   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2975 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2976 {
2977   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2978   PetscInt       n      = A->rmap->n;
2979   PetscInt       cstart = A->cmap->rstart;
2980   PetscInt       *cmap  = mat->garray;
2981   PetscInt       *diagIdx, *offdiagIdx;
2982   Vec            diagV, offdiagV;
2983   PetscScalar    *a, *diagA, *offdiagA;
2984   PetscInt       r;
2985   PetscErrorCode ierr;
2986 
2987   PetscFunctionBegin;
2988   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2989   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2990   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2991   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2992   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2993   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2994   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2995   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2996   for (r = 0; r < n; ++r) {
2997     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2998       a[r]   = diagA[r];
2999       idx[r] = cstart + diagIdx[r];
3000     } else {
3001       a[r]   = offdiagA[r];
3002       idx[r] = cmap[offdiagIdx[r]];
3003     }
3004   }
3005   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3006   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3007   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3008   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3009   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3010   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 #undef __FUNCT__
3015 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3016 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3017 {
3018   PetscErrorCode ierr;
3019   Mat            *dummy;
3020 
3021   PetscFunctionBegin;
3022   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3023   *newmat = *dummy;
3024   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 #undef __FUNCT__
3029 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3030 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3031 {
3032   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3033   PetscErrorCode ierr;
3034 
3035   PetscFunctionBegin;
3036   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 #undef __FUNCT__
3041 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3042 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3043 {
3044   PetscErrorCode ierr;
3045   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3046 
3047   PetscFunctionBegin;
3048   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3049   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3050   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3051   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 /* -------------------------------------------------------------------*/
3056 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3057                                        MatGetRow_MPIAIJ,
3058                                        MatRestoreRow_MPIAIJ,
3059                                        MatMult_MPIAIJ,
3060                                 /* 4*/ MatMultAdd_MPIAIJ,
3061                                        MatMultTranspose_MPIAIJ,
3062                                        MatMultTransposeAdd_MPIAIJ,
3063 #if defined(PETSC_HAVE_PBGL)
3064                                        MatSolve_MPIAIJ,
3065 #else
3066                                        0,
3067 #endif
3068                                        0,
3069                                        0,
3070                                 /*10*/ 0,
3071                                        0,
3072                                        0,
3073                                        MatSOR_MPIAIJ,
3074                                        MatTranspose_MPIAIJ,
3075                                 /*15*/ MatGetInfo_MPIAIJ,
3076                                        MatEqual_MPIAIJ,
3077                                        MatGetDiagonal_MPIAIJ,
3078                                        MatDiagonalScale_MPIAIJ,
3079                                        MatNorm_MPIAIJ,
3080                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3081                                        MatAssemblyEnd_MPIAIJ,
3082                                        MatSetOption_MPIAIJ,
3083                                        MatZeroEntries_MPIAIJ,
3084                                 /*24*/ MatZeroRows_MPIAIJ,
3085                                        0,
3086 #if defined(PETSC_HAVE_PBGL)
3087                                        0,
3088 #else
3089                                        0,
3090 #endif
3091                                        0,
3092                                        0,
3093                                 /*29*/ MatSetUp_MPIAIJ,
3094 #if defined(PETSC_HAVE_PBGL)
3095                                        0,
3096 #else
3097                                        0,
3098 #endif
3099                                        0,
3100                                        0,
3101                                        0,
3102                                 /*34*/ MatDuplicate_MPIAIJ,
3103                                        0,
3104                                        0,
3105                                        0,
3106                                        0,
3107                                 /*39*/ MatAXPY_MPIAIJ,
3108                                        MatGetSubMatrices_MPIAIJ,
3109                                        MatIncreaseOverlap_MPIAIJ,
3110                                        MatGetValues_MPIAIJ,
3111                                        MatCopy_MPIAIJ,
3112                                 /*44*/ MatGetRowMax_MPIAIJ,
3113                                        MatScale_MPIAIJ,
3114                                        0,
3115                                        MatDiagonalSet_MPIAIJ,
3116                                        MatZeroRowsColumns_MPIAIJ,
3117                                 /*49*/ MatSetRandom_MPIAIJ,
3118                                        0,
3119                                        0,
3120                                        0,
3121                                        0,
3122                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3123                                        0,
3124                                        MatSetUnfactored_MPIAIJ,
3125                                        MatPermute_MPIAIJ,
3126                                        0,
3127                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3128                                        MatDestroy_MPIAIJ,
3129                                        MatView_MPIAIJ,
3130                                        0,
3131                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3132                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3133                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3134                                        0,
3135                                        0,
3136                                        0,
3137                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3138                                        MatGetRowMinAbs_MPIAIJ,
3139                                        0,
3140                                        MatSetColoring_MPIAIJ,
3141                                        0,
3142                                        MatSetValuesAdifor_MPIAIJ,
3143                                 /*75*/ MatFDColoringApply_AIJ,
3144                                        0,
3145                                        0,
3146                                        0,
3147                                        MatFindZeroDiagonals_MPIAIJ,
3148                                 /*80*/ 0,
3149                                        0,
3150                                        0,
3151                                 /*83*/ MatLoad_MPIAIJ,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                        0,
3156                                        0,
3157                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3158                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3159                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3160                                        MatPtAP_MPIAIJ_MPIAIJ,
3161                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3162                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3163                                        0,
3164                                        0,
3165                                        0,
3166                                        0,
3167                                 /*99*/ 0,
3168                                        0,
3169                                        0,
3170                                        MatConjugate_MPIAIJ,
3171                                        0,
3172                                 /*104*/MatSetValuesRow_MPIAIJ,
3173                                        MatRealPart_MPIAIJ,
3174                                        MatImaginaryPart_MPIAIJ,
3175                                        0,
3176                                        0,
3177                                 /*109*/0,
3178                                        MatGetRedundantMatrix_MPIAIJ,
3179                                        MatGetRowMin_MPIAIJ,
3180                                        0,
3181                                        0,
3182                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3183                                        0,
3184                                        0,
3185                                        0,
3186                                        0,
3187                                 /*119*/0,
3188                                        0,
3189                                        0,
3190                                        0,
3191                                        MatGetMultiProcBlock_MPIAIJ,
3192                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3193                                        MatGetColumnNorms_MPIAIJ,
3194                                        MatInvertBlockDiagonal_MPIAIJ,
3195                                        0,
3196                                        MatGetSubMatricesParallel_MPIAIJ,
3197                                 /*129*/0,
3198                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3199                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3200                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3201                                        0,
3202                                 /*134*/0,
3203                                        0,
3204                                        0,
3205                                        0,
3206                                        0,
3207                                 /*139*/0,
3208                                        0,
3209                                        0,
3210                                        MatFDColoringSetUp_MPIXAIJ
3211 };
3212 
3213 /* ----------------------------------------------------------------------------------------*/
3214 
3215 #undef __FUNCT__
3216 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3217 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3218 {
3219   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3220   PetscErrorCode ierr;
3221 
3222   PetscFunctionBegin;
3223   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3224   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 #undef __FUNCT__
3229 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3230 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3231 {
3232   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3233   PetscErrorCode ierr;
3234 
3235   PetscFunctionBegin;
3236   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3237   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3238   PetscFunctionReturn(0);
3239 }
3240 
3241 #undef __FUNCT__
3242 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3243 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3244 {
3245   Mat_MPIAIJ     *b;
3246   PetscErrorCode ierr;
3247 
3248   PetscFunctionBegin;
3249   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3250   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3251   b = (Mat_MPIAIJ*)B->data;
3252 
3253   if (!B->preallocated) {
3254     /* Explicitly create 2 MATSEQAIJ matrices. */
3255     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3256     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3257     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3258     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3259     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3260     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3261     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3262     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3263     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3264     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3265   }
3266 
3267   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3268   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3269   B->preallocated = PETSC_TRUE;
3270   PetscFunctionReturn(0);
3271 }
3272 
3273 #undef __FUNCT__
3274 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3275 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3276 {
3277   Mat            mat;
3278   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3279   PetscErrorCode ierr;
3280 
3281   PetscFunctionBegin;
3282   *newmat = 0;
3283   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3284   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3285   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3286   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3287   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3288   a       = (Mat_MPIAIJ*)mat->data;
3289 
3290   mat->factortype   = matin->factortype;
3291   mat->assembled    = PETSC_TRUE;
3292   mat->insertmode   = NOT_SET_VALUES;
3293   mat->preallocated = PETSC_TRUE;
3294 
3295   a->size         = oldmat->size;
3296   a->rank         = oldmat->rank;
3297   a->donotstash   = oldmat->donotstash;
3298   a->roworiented  = oldmat->roworiented;
3299   a->rowindices   = 0;
3300   a->rowvalues    = 0;
3301   a->getrowactive = PETSC_FALSE;
3302 
3303   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3304   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3305 
3306   if (oldmat->colmap) {
3307 #if defined(PETSC_USE_CTABLE)
3308     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3309 #else
3310     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3311     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3312     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3313 #endif
3314   } else a->colmap = 0;
3315   if (oldmat->garray) {
3316     PetscInt len;
3317     len  = oldmat->B->cmap->n;
3318     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3319     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3320     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3321   } else a->garray = 0;
3322 
3323   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3324   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3325   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3326   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3327   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3328   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3329   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3330   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3331   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3332   *newmat = mat;
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 
3337 
3338 #undef __FUNCT__
3339 #define __FUNCT__ "MatLoad_MPIAIJ"
3340 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3341 {
3342   PetscScalar    *vals,*svals;
3343   MPI_Comm       comm;
3344   PetscErrorCode ierr;
3345   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3346   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3347   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3348   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3349   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3350   int            fd;
3351   PetscInt       bs = 1;
3352 
3353   PetscFunctionBegin;
3354   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3355   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3356   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3357   if (!rank) {
3358     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3359     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3360     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3361   }
3362 
3363   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3364   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3365   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3366 
3367   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3368 
3369   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3370   M    = header[1]; N = header[2];
3371   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3372   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3373   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3374 
3375   /* If global sizes are set, check if they are consistent with that given in the file */
3376   if (sizesset) {
3377     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3378   }
3379   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3380   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3381 
3382   /* determine ownership of all (block) rows */
3383   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3384   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3385   else m = newMat->rmap->n; /* Set by user */
3386 
3387   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3388   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3389 
3390   /* First process needs enough room for process with most rows */
3391   if (!rank) {
3392     mmax = rowners[1];
3393     for (i=2; i<=size; i++) {
3394       mmax = PetscMax(mmax, rowners[i]);
3395     }
3396   } else mmax = -1;             /* unused, but compilers complain */
3397 
3398   rowners[0] = 0;
3399   for (i=2; i<=size; i++) {
3400     rowners[i] += rowners[i-1];
3401   }
3402   rstart = rowners[rank];
3403   rend   = rowners[rank+1];
3404 
3405   /* distribute row lengths to all processors */
3406   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3407   if (!rank) {
3408     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3409     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3410     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3411     for (j=0; j<m; j++) {
3412       procsnz[0] += ourlens[j];
3413     }
3414     for (i=1; i<size; i++) {
3415       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3416       /* calculate the number of nonzeros on each processor */
3417       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3418         procsnz[i] += rowlengths[j];
3419       }
3420       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3421     }
3422     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3423   } else {
3424     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3425   }
3426 
3427   if (!rank) {
3428     /* determine max buffer needed and allocate it */
3429     maxnz = 0;
3430     for (i=0; i<size; i++) {
3431       maxnz = PetscMax(maxnz,procsnz[i]);
3432     }
3433     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3434 
3435     /* read in my part of the matrix column indices  */
3436     nz   = procsnz[0];
3437     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3438     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3439 
3440     /* read in every one elses and ship off */
3441     for (i=1; i<size; i++) {
3442       nz   = procsnz[i];
3443       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3444       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3445     }
3446     ierr = PetscFree(cols);CHKERRQ(ierr);
3447   } else {
3448     /* determine buffer space needed for message */
3449     nz = 0;
3450     for (i=0; i<m; i++) {
3451       nz += ourlens[i];
3452     }
3453     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3454 
3455     /* receive message of column indices*/
3456     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3457   }
3458 
3459   /* determine column ownership if matrix is not square */
3460   if (N != M) {
3461     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3462     else n = newMat->cmap->n;
3463     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3464     cstart = cend - n;
3465   } else {
3466     cstart = rstart;
3467     cend   = rend;
3468     n      = cend - cstart;
3469   }
3470 
3471   /* loop over local rows, determining number of off diagonal entries */
3472   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3473   jj   = 0;
3474   for (i=0; i<m; i++) {
3475     for (j=0; j<ourlens[i]; j++) {
3476       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3477       jj++;
3478     }
3479   }
3480 
3481   for (i=0; i<m; i++) {
3482     ourlens[i] -= offlens[i];
3483   }
3484   if (!sizesset) {
3485     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3486   }
3487 
3488   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3489 
3490   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3491 
3492   for (i=0; i<m; i++) {
3493     ourlens[i] += offlens[i];
3494   }
3495 
3496   if (!rank) {
3497     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3498 
3499     /* read in my part of the matrix numerical values  */
3500     nz   = procsnz[0];
3501     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3502 
3503     /* insert into matrix */
3504     jj      = rstart;
3505     smycols = mycols;
3506     svals   = vals;
3507     for (i=0; i<m; i++) {
3508       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3509       smycols += ourlens[i];
3510       svals   += ourlens[i];
3511       jj++;
3512     }
3513 
3514     /* read in other processors and ship out */
3515     for (i=1; i<size; i++) {
3516       nz   = procsnz[i];
3517       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3518       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3519     }
3520     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3521   } else {
3522     /* receive numeric values */
3523     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3524 
3525     /* receive message of values*/
3526     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3527 
3528     /* insert into matrix */
3529     jj      = rstart;
3530     smycols = mycols;
3531     svals   = vals;
3532     for (i=0; i<m; i++) {
3533       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3534       smycols += ourlens[i];
3535       svals   += ourlens[i];
3536       jj++;
3537     }
3538   }
3539   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3540   ierr = PetscFree(vals);CHKERRQ(ierr);
3541   ierr = PetscFree(mycols);CHKERRQ(ierr);
3542   ierr = PetscFree(rowners);CHKERRQ(ierr);
3543   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3544   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 #undef __FUNCT__
3549 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3550 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3551 {
3552   PetscErrorCode ierr;
3553   IS             iscol_local;
3554   PetscInt       csize;
3555 
3556   PetscFunctionBegin;
3557   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3558   if (call == MAT_REUSE_MATRIX) {
3559     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3560     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3561   } else {
3562     PetscInt cbs;
3563     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3564     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3565     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3566   }
3567   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3568   if (call == MAT_INITIAL_MATRIX) {
3569     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3570     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3571   }
3572   PetscFunctionReturn(0);
3573 }
3574 
3575 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3576 #undef __FUNCT__
3577 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3578 /*
3579     Not great since it makes two copies of the submatrix, first an SeqAIJ
3580   in local and then by concatenating the local matrices the end result.
3581   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3582 
3583   Note: This requires a sequential iscol with all indices.
3584 */
3585 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3586 {
3587   PetscErrorCode ierr;
3588   PetscMPIInt    rank,size;
3589   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3590   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3591   PetscBool      allcolumns, colflag;
3592   Mat            M,Mreuse;
3593   MatScalar      *vwork,*aa;
3594   MPI_Comm       comm;
3595   Mat_SeqAIJ     *aij;
3596 
3597   PetscFunctionBegin;
3598   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3599   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3601 
3602   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3603   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3604   if (colflag && ncol == mat->cmap->N) {
3605     allcolumns = PETSC_TRUE;
3606   } else {
3607     allcolumns = PETSC_FALSE;
3608   }
3609   if (call ==  MAT_REUSE_MATRIX) {
3610     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3611     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3612     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3613   } else {
3614     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3615   }
3616 
3617   /*
3618       m - number of local rows
3619       n - number of columns (same on all processors)
3620       rstart - first row in new global matrix generated
3621   */
3622   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3623   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3624   if (call == MAT_INITIAL_MATRIX) {
3625     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3626     ii  = aij->i;
3627     jj  = aij->j;
3628 
3629     /*
3630         Determine the number of non-zeros in the diagonal and off-diagonal
3631         portions of the matrix in order to do correct preallocation
3632     */
3633 
3634     /* first get start and end of "diagonal" columns */
3635     if (csize == PETSC_DECIDE) {
3636       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3637       if (mglobal == n) { /* square matrix */
3638         nlocal = m;
3639       } else {
3640         nlocal = n/size + ((n % size) > rank);
3641       }
3642     } else {
3643       nlocal = csize;
3644     }
3645     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3646     rstart = rend - nlocal;
3647     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3648 
3649     /* next, compute all the lengths */
3650     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3651     olens = dlens + m;
3652     for (i=0; i<m; i++) {
3653       jend = ii[i+1] - ii[i];
3654       olen = 0;
3655       dlen = 0;
3656       for (j=0; j<jend; j++) {
3657         if (*jj < rstart || *jj >= rend) olen++;
3658         else dlen++;
3659         jj++;
3660       }
3661       olens[i] = olen;
3662       dlens[i] = dlen;
3663     }
3664     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3665     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3666     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3667     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3668     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3669     ierr = PetscFree(dlens);CHKERRQ(ierr);
3670   } else {
3671     PetscInt ml,nl;
3672 
3673     M    = *newmat;
3674     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3675     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3676     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3677     /*
3678          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3679        rather than the slower MatSetValues().
3680     */
3681     M->was_assembled = PETSC_TRUE;
3682     M->assembled     = PETSC_FALSE;
3683   }
3684   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3685   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3686   ii   = aij->i;
3687   jj   = aij->j;
3688   aa   = aij->a;
3689   for (i=0; i<m; i++) {
3690     row   = rstart + i;
3691     nz    = ii[i+1] - ii[i];
3692     cwork = jj;     jj += nz;
3693     vwork = aa;     aa += nz;
3694     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3695   }
3696 
3697   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3698   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699   *newmat = M;
3700 
3701   /* save submatrix used in processor for next request */
3702   if (call ==  MAT_INITIAL_MATRIX) {
3703     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3704     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3705   }
3706   PetscFunctionReturn(0);
3707 }
3708 
3709 #undef __FUNCT__
3710 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3711 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3712 {
3713   PetscInt       m,cstart, cend,j,nnz,i,d;
3714   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3715   const PetscInt *JJ;
3716   PetscScalar    *values;
3717   PetscErrorCode ierr;
3718 
3719   PetscFunctionBegin;
3720   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3721 
3722   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3723   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3724   m      = B->rmap->n;
3725   cstart = B->cmap->rstart;
3726   cend   = B->cmap->rend;
3727   rstart = B->rmap->rstart;
3728 
3729   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3730 
3731 #if defined(PETSC_USE_DEBUGGING)
3732   for (i=0; i<m; i++) {
3733     nnz = Ii[i+1]- Ii[i];
3734     JJ  = J + Ii[i];
3735     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3736     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3737     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3738   }
3739 #endif
3740 
3741   for (i=0; i<m; i++) {
3742     nnz     = Ii[i+1]- Ii[i];
3743     JJ      = J + Ii[i];
3744     nnz_max = PetscMax(nnz_max,nnz);
3745     d       = 0;
3746     for (j=0; j<nnz; j++) {
3747       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3748     }
3749     d_nnz[i] = d;
3750     o_nnz[i] = nnz - d;
3751   }
3752   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3753   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3754 
3755   if (v) values = (PetscScalar*)v;
3756   else {
3757     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3758   }
3759 
3760   for (i=0; i<m; i++) {
3761     ii   = i + rstart;
3762     nnz  = Ii[i+1]- Ii[i];
3763     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3764   }
3765   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3766   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3767 
3768   if (!v) {
3769     ierr = PetscFree(values);CHKERRQ(ierr);
3770   }
3771   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3777 /*@
3778    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3779    (the default parallel PETSc format).
3780 
3781    Collective on MPI_Comm
3782 
3783    Input Parameters:
3784 +  B - the matrix
3785 .  i - the indices into j for the start of each local row (starts with zero)
3786 .  j - the column indices for each local row (starts with zero)
3787 -  v - optional values in the matrix
3788 
3789    Level: developer
3790 
3791    Notes:
3792        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3793      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3794      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3795 
3796        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3797 
3798        The format which is used for the sparse matrix input, is equivalent to a
3799     row-major ordering.. i.e for the following matrix, the input data expected is
3800     as shown:
3801 
3802         1 0 0
3803         2 0 3     P0
3804        -------
3805         4 5 6     P1
3806 
3807      Process0 [P0]: rows_owned=[0,1]
3808         i =  {0,1,3}  [size = nrow+1  = 2+1]
3809         j =  {0,0,2}  [size = nz = 6]
3810         v =  {1,2,3}  [size = nz = 6]
3811 
3812      Process1 [P1]: rows_owned=[2]
3813         i =  {0,3}    [size = nrow+1  = 1+1]
3814         j =  {0,1,2}  [size = nz = 6]
3815         v =  {4,5,6}  [size = nz = 6]
3816 
3817 .keywords: matrix, aij, compressed row, sparse, parallel
3818 
3819 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3820           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3821 @*/
3822 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3823 {
3824   PetscErrorCode ierr;
3825 
3826   PetscFunctionBegin;
3827   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3828   PetscFunctionReturn(0);
3829 }
3830 
3831 #undef __FUNCT__
3832 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3833 /*@C
3834    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3835    (the default parallel PETSc format).  For good matrix assembly performance
3836    the user should preallocate the matrix storage by setting the parameters
3837    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3838    performance can be increased by more than a factor of 50.
3839 
3840    Collective on MPI_Comm
3841 
3842    Input Parameters:
3843 +  B - the matrix
3844 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3845            (same value is used for all local rows)
3846 .  d_nnz - array containing the number of nonzeros in the various rows of the
3847            DIAGONAL portion of the local submatrix (possibly different for each row)
3848            or NULL, if d_nz is used to specify the nonzero structure.
3849            The size of this array is equal to the number of local rows, i.e 'm'.
3850            For matrices that will be factored, you must leave room for (and set)
3851            the diagonal entry even if it is zero.
3852 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3853            submatrix (same value is used for all local rows).
3854 -  o_nnz - array containing the number of nonzeros in the various rows of the
3855            OFF-DIAGONAL portion of the local submatrix (possibly different for
3856            each row) or NULL, if o_nz is used to specify the nonzero
3857            structure. The size of this array is equal to the number
3858            of local rows, i.e 'm'.
3859 
3860    If the *_nnz parameter is given then the *_nz parameter is ignored
3861 
3862    The AIJ format (also called the Yale sparse matrix format or
3863    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3864    storage.  The stored row and column indices begin with zero.
3865    See Users-Manual: ch_mat for details.
3866 
3867    The parallel matrix is partitioned such that the first m0 rows belong to
3868    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3869    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3870 
3871    The DIAGONAL portion of the local submatrix of a processor can be defined
3872    as the submatrix which is obtained by extraction the part corresponding to
3873    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3874    first row that belongs to the processor, r2 is the last row belonging to
3875    the this processor, and c1-c2 is range of indices of the local part of a
3876    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3877    common case of a square matrix, the row and column ranges are the same and
3878    the DIAGONAL part is also square. The remaining portion of the local
3879    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3880 
3881    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3882 
3883    You can call MatGetInfo() to get information on how effective the preallocation was;
3884    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3885    You can also run with the option -info and look for messages with the string
3886    malloc in them to see if additional memory allocation was needed.
3887 
3888    Example usage:
3889 
3890    Consider the following 8x8 matrix with 34 non-zero values, that is
3891    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3892    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3893    as follows:
3894 
3895 .vb
3896             1  2  0  |  0  3  0  |  0  4
3897     Proc0   0  5  6  |  7  0  0  |  8  0
3898             9  0 10  | 11  0  0  | 12  0
3899     -------------------------------------
3900            13  0 14  | 15 16 17  |  0  0
3901     Proc1   0 18  0  | 19 20 21  |  0  0
3902             0  0  0  | 22 23  0  | 24  0
3903     -------------------------------------
3904     Proc2  25 26 27  |  0  0 28  | 29  0
3905            30  0  0  | 31 32 33  |  0 34
3906 .ve
3907 
3908    This can be represented as a collection of submatrices as:
3909 
3910 .vb
3911       A B C
3912       D E F
3913       G H I
3914 .ve
3915 
3916    Where the submatrices A,B,C are owned by proc0, D,E,F are
3917    owned by proc1, G,H,I are owned by proc2.
3918 
3919    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3920    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3921    The 'M','N' parameters are 8,8, and have the same values on all procs.
3922 
3923    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3924    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3925    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3926    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3927    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3928    matrix, ans [DF] as another SeqAIJ matrix.
3929 
3930    When d_nz, o_nz parameters are specified, d_nz storage elements are
3931    allocated for every row of the local diagonal submatrix, and o_nz
3932    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3933    One way to choose d_nz and o_nz is to use the max nonzerors per local
3934    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3935    In this case, the values of d_nz,o_nz are:
3936 .vb
3937      proc0 : dnz = 2, o_nz = 2
3938      proc1 : dnz = 3, o_nz = 2
3939      proc2 : dnz = 1, o_nz = 4
3940 .ve
3941    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3942    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3943    for proc3. i.e we are using 12+15+10=37 storage locations to store
3944    34 values.
3945 
3946    When d_nnz, o_nnz parameters are specified, the storage is specified
3947    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3948    In the above case the values for d_nnz,o_nnz are:
3949 .vb
3950      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3951      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3952      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3953 .ve
3954    Here the space allocated is sum of all the above values i.e 34, and
3955    hence pre-allocation is perfect.
3956 
3957    Level: intermediate
3958 
3959 .keywords: matrix, aij, compressed row, sparse, parallel
3960 
3961 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3962           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3963 @*/
3964 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3965 {
3966   PetscErrorCode ierr;
3967 
3968   PetscFunctionBegin;
3969   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3970   PetscValidType(B,1);
3971   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3972   PetscFunctionReturn(0);
3973 }
3974 
3975 #undef __FUNCT__
3976 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3977 /*@
3978      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3979          CSR format the local rows.
3980 
3981    Collective on MPI_Comm
3982 
3983    Input Parameters:
3984 +  comm - MPI communicator
3985 .  m - number of local rows (Cannot be PETSC_DECIDE)
3986 .  n - This value should be the same as the local size used in creating the
3987        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3988        calculated if N is given) For square matrices n is almost always m.
3989 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3990 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3991 .   i - row indices
3992 .   j - column indices
3993 -   a - matrix values
3994 
3995    Output Parameter:
3996 .   mat - the matrix
3997 
3998    Level: intermediate
3999 
4000    Notes:
4001        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4002      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4003      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4004 
4005        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4006 
4007        The format which is used for the sparse matrix input, is equivalent to a
4008     row-major ordering.. i.e for the following matrix, the input data expected is
4009     as shown:
4010 
4011         1 0 0
4012         2 0 3     P0
4013        -------
4014         4 5 6     P1
4015 
4016      Process0 [P0]: rows_owned=[0,1]
4017         i =  {0,1,3}  [size = nrow+1  = 2+1]
4018         j =  {0,0,2}  [size = nz = 6]
4019         v =  {1,2,3}  [size = nz = 6]
4020 
4021      Process1 [P1]: rows_owned=[2]
4022         i =  {0,3}    [size = nrow+1  = 1+1]
4023         j =  {0,1,2}  [size = nz = 6]
4024         v =  {4,5,6}  [size = nz = 6]
4025 
4026 .keywords: matrix, aij, compressed row, sparse, parallel
4027 
4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4029           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4030 @*/
4031 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4032 {
4033   PetscErrorCode ierr;
4034 
4035   PetscFunctionBegin;
4036   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4037   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4038   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4039   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4040   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4041   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4042   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4043   PetscFunctionReturn(0);
4044 }
4045 
4046 #undef __FUNCT__
4047 #define __FUNCT__ "MatCreateAIJ"
4048 /*@C
4049    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4050    (the default parallel PETSc format).  For good matrix assembly performance
4051    the user should preallocate the matrix storage by setting the parameters
4052    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4053    performance can be increased by more than a factor of 50.
4054 
4055    Collective on MPI_Comm
4056 
4057    Input Parameters:
4058 +  comm - MPI communicator
4059 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4060            This value should be the same as the local size used in creating the
4061            y vector for the matrix-vector product y = Ax.
4062 .  n - This value should be the same as the local size used in creating the
4063        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4064        calculated if N is given) For square matrices n is almost always m.
4065 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4066 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4067 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4068            (same value is used for all local rows)
4069 .  d_nnz - array containing the number of nonzeros in the various rows of the
4070            DIAGONAL portion of the local submatrix (possibly different for each row)
4071            or NULL, if d_nz is used to specify the nonzero structure.
4072            The size of this array is equal to the number of local rows, i.e 'm'.
4073 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4074            submatrix (same value is used for all local rows).
4075 -  o_nnz - array containing the number of nonzeros in the various rows of the
4076            OFF-DIAGONAL portion of the local submatrix (possibly different for
4077            each row) or NULL, if o_nz is used to specify the nonzero
4078            structure. The size of this array is equal to the number
4079            of local rows, i.e 'm'.
4080 
4081    Output Parameter:
4082 .  A - the matrix
4083 
4084    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4085    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4086    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4087 
4088    Notes:
4089    If the *_nnz parameter is given then the *_nz parameter is ignored
4090 
4091    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4092    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4093    storage requirements for this matrix.
4094 
4095    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4096    processor than it must be used on all processors that share the object for
4097    that argument.
4098 
4099    The user MUST specify either the local or global matrix dimensions
4100    (possibly both).
4101 
4102    The parallel matrix is partitioned across processors such that the
4103    first m0 rows belong to process 0, the next m1 rows belong to
4104    process 1, the next m2 rows belong to process 2 etc.. where
4105    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4106    values corresponding to [m x N] submatrix.
4107 
4108    The columns are logically partitioned with the n0 columns belonging
4109    to 0th partition, the next n1 columns belonging to the next
4110    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4111 
4112    The DIAGONAL portion of the local submatrix on any given processor
4113    is the submatrix corresponding to the rows and columns m,n
4114    corresponding to the given processor. i.e diagonal matrix on
4115    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4116    etc. The remaining portion of the local submatrix [m x (N-n)]
4117    constitute the OFF-DIAGONAL portion. The example below better
4118    illustrates this concept.
4119 
4120    For a square global matrix we define each processor's diagonal portion
4121    to be its local rows and the corresponding columns (a square submatrix);
4122    each processor's off-diagonal portion encompasses the remainder of the
4123    local matrix (a rectangular submatrix).
4124 
4125    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4126 
4127    When calling this routine with a single process communicator, a matrix of
4128    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4129    type of communicator, use the construction mechanism:
4130      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4131 
4132    By default, this format uses inodes (identical nodes) when possible.
4133    We search for consecutive rows with the same nonzero structure, thereby
4134    reusing matrix information to achieve increased efficiency.
4135 
4136    Options Database Keys:
4137 +  -mat_no_inode  - Do not use inodes
4138 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4139 -  -mat_aij_oneindex - Internally use indexing starting at 1
4140         rather than 0.  Note that when calling MatSetValues(),
4141         the user still MUST index entries starting at 0!
4142 
4143 
4144    Example usage:
4145 
4146    Consider the following 8x8 matrix with 34 non-zero values, that is
4147    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4148    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4149    as follows:
4150 
4151 .vb
4152             1  2  0  |  0  3  0  |  0  4
4153     Proc0   0  5  6  |  7  0  0  |  8  0
4154             9  0 10  | 11  0  0  | 12  0
4155     -------------------------------------
4156            13  0 14  | 15 16 17  |  0  0
4157     Proc1   0 18  0  | 19 20 21  |  0  0
4158             0  0  0  | 22 23  0  | 24  0
4159     -------------------------------------
4160     Proc2  25 26 27  |  0  0 28  | 29  0
4161            30  0  0  | 31 32 33  |  0 34
4162 .ve
4163 
4164    This can be represented as a collection of submatrices as:
4165 
4166 .vb
4167       A B C
4168       D E F
4169       G H I
4170 .ve
4171 
4172    Where the submatrices A,B,C are owned by proc0, D,E,F are
4173    owned by proc1, G,H,I are owned by proc2.
4174 
4175    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4176    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4177    The 'M','N' parameters are 8,8, and have the same values on all procs.
4178 
4179    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4180    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4181    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4182    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4183    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4184    matrix, ans [DF] as another SeqAIJ matrix.
4185 
4186    When d_nz, o_nz parameters are specified, d_nz storage elements are
4187    allocated for every row of the local diagonal submatrix, and o_nz
4188    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4189    One way to choose d_nz and o_nz is to use the max nonzerors per local
4190    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4191    In this case, the values of d_nz,o_nz are:
4192 .vb
4193      proc0 : dnz = 2, o_nz = 2
4194      proc1 : dnz = 3, o_nz = 2
4195      proc2 : dnz = 1, o_nz = 4
4196 .ve
4197    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4198    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4199    for proc3. i.e we are using 12+15+10=37 storage locations to store
4200    34 values.
4201 
4202    When d_nnz, o_nnz parameters are specified, the storage is specified
4203    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4204    In the above case the values for d_nnz,o_nnz are:
4205 .vb
4206      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4207      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4208      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4209 .ve
4210    Here the space allocated is sum of all the above values i.e 34, and
4211    hence pre-allocation is perfect.
4212 
4213    Level: intermediate
4214 
4215 .keywords: matrix, aij, compressed row, sparse, parallel
4216 
4217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4218           MPIAIJ, MatCreateMPIAIJWithArrays()
4219 @*/
4220 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4221 {
4222   PetscErrorCode ierr;
4223   PetscMPIInt    size;
4224 
4225   PetscFunctionBegin;
4226   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4227   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4228   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4229   if (size > 1) {
4230     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4231     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4232   } else {
4233     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4234     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4235   }
4236   PetscFunctionReturn(0);
4237 }
4238 
4239 #undef __FUNCT__
4240 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4241 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4242 {
4243   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4244 
4245   PetscFunctionBegin;
4246   if (Ad)     *Ad     = a->A;
4247   if (Ao)     *Ao     = a->B;
4248   if (colmap) *colmap = a->garray;
4249   PetscFunctionReturn(0);
4250 }
4251 
4252 #undef __FUNCT__
4253 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4254 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4255 {
4256   PetscErrorCode ierr;
4257   PetscInt       i;
4258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4259 
4260   PetscFunctionBegin;
4261   if (coloring->ctype == IS_COLORING_GLOBAL) {
4262     ISColoringValue *allcolors,*colors;
4263     ISColoring      ocoloring;
4264 
4265     /* set coloring for diagonal portion */
4266     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4267 
4268     /* set coloring for off-diagonal portion */
4269     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4270     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4271     for (i=0; i<a->B->cmap->n; i++) {
4272       colors[i] = allcolors[a->garray[i]];
4273     }
4274     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4275     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4276     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4277     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4278   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4279     ISColoringValue *colors;
4280     PetscInt        *larray;
4281     ISColoring      ocoloring;
4282 
4283     /* set coloring for diagonal portion */
4284     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4285     for (i=0; i<a->A->cmap->n; i++) {
4286       larray[i] = i + A->cmap->rstart;
4287     }
4288     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4289     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4290     for (i=0; i<a->A->cmap->n; i++) {
4291       colors[i] = coloring->colors[larray[i]];
4292     }
4293     ierr = PetscFree(larray);CHKERRQ(ierr);
4294     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4295     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4296     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4297 
4298     /* set coloring for off-diagonal portion */
4299     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4300     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4301     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4302     for (i=0; i<a->B->cmap->n; i++) {
4303       colors[i] = coloring->colors[larray[i]];
4304     }
4305     ierr = PetscFree(larray);CHKERRQ(ierr);
4306     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4307     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4308     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4309   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 #undef __FUNCT__
4314 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4315 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4316 {
4317   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4318   PetscErrorCode ierr;
4319 
4320   PetscFunctionBegin;
4321   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4322   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4323   PetscFunctionReturn(0);
4324 }
4325 
4326 #undef __FUNCT__
4327 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4328 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4329 {
4330   PetscErrorCode ierr;
4331   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4332   PetscInt       *indx;
4333 
4334   PetscFunctionBegin;
4335   /* This routine will ONLY return MPIAIJ type matrix */
4336   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4337   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4338   if (n == PETSC_DECIDE) {
4339     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4340   }
4341   /* Check sum(n) = N */
4342   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4343   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4344 
4345   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4346   rstart -= m;
4347 
4348   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4349   for (i=0; i<m; i++) {
4350     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4351     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4352     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4353   }
4354 
4355   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4356   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4357   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4358   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4359   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4360   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4361   PetscFunctionReturn(0);
4362 }
4363 
4364 #undef __FUNCT__
4365 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4366 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4367 {
4368   PetscErrorCode ierr;
4369   PetscInt       m,N,i,rstart,nnz,Ii;
4370   PetscInt       *indx;
4371   PetscScalar    *values;
4372 
4373   PetscFunctionBegin;
4374   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4375   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4376   for (i=0; i<m; i++) {
4377     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4378     Ii   = i + rstart;
4379     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4380     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4381   }
4382   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4383   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4384   PetscFunctionReturn(0);
4385 }
4386 
4387 #undef __FUNCT__
4388 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4389 /*@
4390       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4391                  matrices from each processor
4392 
4393     Collective on MPI_Comm
4394 
4395    Input Parameters:
4396 +    comm - the communicators the parallel matrix will live on
4397 .    inmat - the input sequential matrices
4398 .    n - number of local columns (or PETSC_DECIDE)
4399 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4400 
4401    Output Parameter:
4402 .    outmat - the parallel matrix generated
4403 
4404     Level: advanced
4405 
4406    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4407 
4408 @*/
4409 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4410 {
4411   PetscErrorCode ierr;
4412   PetscMPIInt    size;
4413 
4414   PetscFunctionBegin;
4415   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4416   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4417   if (size == 1) {
4418     if (scall == MAT_INITIAL_MATRIX) {
4419       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4420     } else {
4421       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4422     }
4423   } else {
4424     if (scall == MAT_INITIAL_MATRIX) {
4425       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4426     }
4427     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4428   }
4429   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4430   PetscFunctionReturn(0);
4431 }
4432 
4433 #undef __FUNCT__
4434 #define __FUNCT__ "MatFileSplit"
4435 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4436 {
4437   PetscErrorCode    ierr;
4438   PetscMPIInt       rank;
4439   PetscInt          m,N,i,rstart,nnz;
4440   size_t            len;
4441   const PetscInt    *indx;
4442   PetscViewer       out;
4443   char              *name;
4444   Mat               B;
4445   const PetscScalar *values;
4446 
4447   PetscFunctionBegin;
4448   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4449   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4450   /* Should this be the type of the diagonal block of A? */
4451   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4452   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4453   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4454   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4455   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4456   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4457   for (i=0; i<m; i++) {
4458     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4459     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4460     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4461   }
4462   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4463   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4464 
4465   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4466   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4467   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4468   sprintf(name,"%s.%d",outfile,rank);
4469   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4470   ierr = PetscFree(name);CHKERRQ(ierr);
4471   ierr = MatView(B,out);CHKERRQ(ierr);
4472   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4473   ierr = MatDestroy(&B);CHKERRQ(ierr);
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4478 #undef __FUNCT__
4479 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4480 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4481 {
4482   PetscErrorCode      ierr;
4483   Mat_Merge_SeqsToMPI *merge;
4484   PetscContainer      container;
4485 
4486   PetscFunctionBegin;
4487   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4488   if (container) {
4489     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4502     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4503     ierr = PetscFree(merge);CHKERRQ(ierr);
4504     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4505   }
4506   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4507   PetscFunctionReturn(0);
4508 }
4509 
4510 #include <../src/mat/utils/freespace.h>
4511 #include <petscbt.h>
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4515 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4516 {
4517   PetscErrorCode      ierr;
4518   MPI_Comm            comm;
4519   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4520   PetscMPIInt         size,rank,taga,*len_s;
4521   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4522   PetscInt            proc,m;
4523   PetscInt            **buf_ri,**buf_rj;
4524   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4525   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4526   MPI_Request         *s_waits,*r_waits;
4527   MPI_Status          *status;
4528   MatScalar           *aa=a->a;
4529   MatScalar           **abuf_r,*ba_i;
4530   Mat_Merge_SeqsToMPI *merge;
4531   PetscContainer      container;
4532 
4533   PetscFunctionBegin;
4534   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4535   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4536 
4537   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4538   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4539 
4540   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4541   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4542 
4543   bi     = merge->bi;
4544   bj     = merge->bj;
4545   buf_ri = merge->buf_ri;
4546   buf_rj = merge->buf_rj;
4547 
4548   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4549   owners = merge->rowmap->range;
4550   len_s  = merge->len_s;
4551 
4552   /* send and recv matrix values */
4553   /*-----------------------------*/
4554   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4555   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4556 
4557   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4558   for (proc=0,k=0; proc<size; proc++) {
4559     if (!len_s[proc]) continue;
4560     i    = owners[proc];
4561     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4562     k++;
4563   }
4564 
4565   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4566   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4567   ierr = PetscFree(status);CHKERRQ(ierr);
4568 
4569   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4570   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4571 
4572   /* insert mat values of mpimat */
4573   /*----------------------------*/
4574   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4575   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4576 
4577   for (k=0; k<merge->nrecv; k++) {
4578     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4579     nrows       = *(buf_ri_k[k]);
4580     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4581     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4582   }
4583 
4584   /* set values of ba */
4585   m = merge->rowmap->n;
4586   for (i=0; i<m; i++) {
4587     arow = owners[rank] + i;
4588     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4589     bnzi = bi[i+1] - bi[i];
4590     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4591 
4592     /* add local non-zero vals of this proc's seqmat into ba */
4593     anzi   = ai[arow+1] - ai[arow];
4594     aj     = a->j + ai[arow];
4595     aa     = a->a + ai[arow];
4596     nextaj = 0;
4597     for (j=0; nextaj<anzi; j++) {
4598       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4599         ba_i[j] += aa[nextaj++];
4600       }
4601     }
4602 
4603     /* add received vals into ba */
4604     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4605       /* i-th row */
4606       if (i == *nextrow[k]) {
4607         anzi   = *(nextai[k]+1) - *nextai[k];
4608         aj     = buf_rj[k] + *(nextai[k]);
4609         aa     = abuf_r[k] + *(nextai[k]);
4610         nextaj = 0;
4611         for (j=0; nextaj<anzi; j++) {
4612           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4613             ba_i[j] += aa[nextaj++];
4614           }
4615         }
4616         nextrow[k]++; nextai[k]++;
4617       }
4618     }
4619     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4620   }
4621   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4622   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4623 
4624   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4625   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4626   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4627   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4628   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4629   PetscFunctionReturn(0);
4630 }
4631 
4632 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4633 
4634 #undef __FUNCT__
4635 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4636 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4637 {
4638   PetscErrorCode      ierr;
4639   Mat                 B_mpi;
4640   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4641   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4642   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4643   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4644   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4645   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4646   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4647   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4648   MPI_Status          *status;
4649   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4650   PetscBT             lnkbt;
4651   Mat_Merge_SeqsToMPI *merge;
4652   PetscContainer      container;
4653 
4654   PetscFunctionBegin;
4655   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4656 
4657   /* make sure it is a PETSc comm */
4658   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4659   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4660   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4661 
4662   ierr = PetscNew(&merge);CHKERRQ(ierr);
4663   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4664 
4665   /* determine row ownership */
4666   /*---------------------------------------------------------*/
4667   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4669   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4670   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4671   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4672   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4673   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4674 
4675   m      = merge->rowmap->n;
4676   owners = merge->rowmap->range;
4677 
4678   /* determine the number of messages to send, their lengths */
4679   /*---------------------------------------------------------*/
4680   len_s = merge->len_s;
4681 
4682   len          = 0; /* length of buf_si[] */
4683   merge->nsend = 0;
4684   for (proc=0; proc<size; proc++) {
4685     len_si[proc] = 0;
4686     if (proc == rank) {
4687       len_s[proc] = 0;
4688     } else {
4689       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4690       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4691     }
4692     if (len_s[proc]) {
4693       merge->nsend++;
4694       nrows = 0;
4695       for (i=owners[proc]; i<owners[proc+1]; i++) {
4696         if (ai[i+1] > ai[i]) nrows++;
4697       }
4698       len_si[proc] = 2*(nrows+1);
4699       len         += len_si[proc];
4700     }
4701   }
4702 
4703   /* determine the number and length of messages to receive for ij-structure */
4704   /*-------------------------------------------------------------------------*/
4705   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4706   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4707 
4708   /* post the Irecv of j-structure */
4709   /*-------------------------------*/
4710   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4711   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4712 
4713   /* post the Isend of j-structure */
4714   /*--------------------------------*/
4715   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4716 
4717   for (proc=0, k=0; proc<size; proc++) {
4718     if (!len_s[proc]) continue;
4719     i    = owners[proc];
4720     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4721     k++;
4722   }
4723 
4724   /* receives and sends of j-structure are complete */
4725   /*------------------------------------------------*/
4726   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4727   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4728 
4729   /* send and recv i-structure */
4730   /*---------------------------*/
4731   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4732   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4733 
4734   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4735   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4736   for (proc=0,k=0; proc<size; proc++) {
4737     if (!len_s[proc]) continue;
4738     /* form outgoing message for i-structure:
4739          buf_si[0]:                 nrows to be sent
4740                [1:nrows]:           row index (global)
4741                [nrows+1:2*nrows+1]: i-structure index
4742     */
4743     /*-------------------------------------------*/
4744     nrows       = len_si[proc]/2 - 1;
4745     buf_si_i    = buf_si + nrows+1;
4746     buf_si[0]   = nrows;
4747     buf_si_i[0] = 0;
4748     nrows       = 0;
4749     for (i=owners[proc]; i<owners[proc+1]; i++) {
4750       anzi = ai[i+1] - ai[i];
4751       if (anzi) {
4752         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4753         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4754         nrows++;
4755       }
4756     }
4757     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4758     k++;
4759     buf_si += len_si[proc];
4760   }
4761 
4762   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4763   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4764 
4765   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4766   for (i=0; i<merge->nrecv; i++) {
4767     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4768   }
4769 
4770   ierr = PetscFree(len_si);CHKERRQ(ierr);
4771   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4772   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4773   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4774   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4775   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4776   ierr = PetscFree(status);CHKERRQ(ierr);
4777 
4778   /* compute a local seq matrix in each processor */
4779   /*----------------------------------------------*/
4780   /* allocate bi array and free space for accumulating nonzero column info */
4781   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4782   bi[0] = 0;
4783 
4784   /* create and initialize a linked list */
4785   nlnk = N+1;
4786   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4787 
4788   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4789   len  = ai[owners[rank+1]] - ai[owners[rank]];
4790   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4791 
4792   current_space = free_space;
4793 
4794   /* determine symbolic info for each local row */
4795   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4796 
4797   for (k=0; k<merge->nrecv; k++) {
4798     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4799     nrows       = *buf_ri_k[k];
4800     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4801     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4802   }
4803 
4804   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4805   len  = 0;
4806   for (i=0; i<m; i++) {
4807     bnzi = 0;
4808     /* add local non-zero cols of this proc's seqmat into lnk */
4809     arow  = owners[rank] + i;
4810     anzi  = ai[arow+1] - ai[arow];
4811     aj    = a->j + ai[arow];
4812     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4813     bnzi += nlnk;
4814     /* add received col data into lnk */
4815     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4816       if (i == *nextrow[k]) { /* i-th row */
4817         anzi  = *(nextai[k]+1) - *nextai[k];
4818         aj    = buf_rj[k] + *nextai[k];
4819         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4820         bnzi += nlnk;
4821         nextrow[k]++; nextai[k]++;
4822       }
4823     }
4824     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4825 
4826     /* if free space is not available, make more free space */
4827     if (current_space->local_remaining<bnzi) {
4828       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4829       nspacedouble++;
4830     }
4831     /* copy data into free space, then initialize lnk */
4832     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4833     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4834 
4835     current_space->array           += bnzi;
4836     current_space->local_used      += bnzi;
4837     current_space->local_remaining -= bnzi;
4838 
4839     bi[i+1] = bi[i] + bnzi;
4840   }
4841 
4842   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4843 
4844   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4845   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4846   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4847 
4848   /* create symbolic parallel matrix B_mpi */
4849   /*---------------------------------------*/
4850   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4851   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4852   if (n==PETSC_DECIDE) {
4853     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4854   } else {
4855     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4856   }
4857   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4858   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4859   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4860   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4861   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4862 
4863   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4864   B_mpi->assembled    = PETSC_FALSE;
4865   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4866   merge->bi           = bi;
4867   merge->bj           = bj;
4868   merge->buf_ri       = buf_ri;
4869   merge->buf_rj       = buf_rj;
4870   merge->coi          = NULL;
4871   merge->coj          = NULL;
4872   merge->owners_co    = NULL;
4873 
4874   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4875 
4876   /* attach the supporting struct to B_mpi for reuse */
4877   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4878   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4879   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4880   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4881   *mpimat = B_mpi;
4882 
4883   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4884   PetscFunctionReturn(0);
4885 }
4886 
4887 #undef __FUNCT__
4888 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4889 /*@C
4890       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4891                  matrices from each processor
4892 
4893     Collective on MPI_Comm
4894 
4895    Input Parameters:
4896 +    comm - the communicators the parallel matrix will live on
4897 .    seqmat - the input sequential matrices
4898 .    m - number of local rows (or PETSC_DECIDE)
4899 .    n - number of local columns (or PETSC_DECIDE)
4900 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4901 
4902    Output Parameter:
4903 .    mpimat - the parallel matrix generated
4904 
4905     Level: advanced
4906 
4907    Notes:
4908      The dimensions of the sequential matrix in each processor MUST be the same.
4909      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4910      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4911 @*/
4912 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4913 {
4914   PetscErrorCode ierr;
4915   PetscMPIInt    size;
4916 
4917   PetscFunctionBegin;
4918   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4919   if (size == 1) {
4920     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4921     if (scall == MAT_INITIAL_MATRIX) {
4922       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4923     } else {
4924       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4925     }
4926     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4927     PetscFunctionReturn(0);
4928   }
4929   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930   if (scall == MAT_INITIAL_MATRIX) {
4931     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4932   }
4933   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4934   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4935   PetscFunctionReturn(0);
4936 }
4937 
4938 #undef __FUNCT__
4939 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4940 /*@
4941      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4942           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4943           with MatGetSize()
4944 
4945     Not Collective
4946 
4947    Input Parameters:
4948 +    A - the matrix
4949 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4950 
4951    Output Parameter:
4952 .    A_loc - the local sequential matrix generated
4953 
4954     Level: developer
4955 
4956 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4957 
4958 @*/
4959 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4960 {
4961   PetscErrorCode ierr;
4962   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4963   Mat_SeqAIJ     *mat,*a,*b;
4964   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4965   MatScalar      *aa,*ba,*cam;
4966   PetscScalar    *ca;
4967   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4968   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4969   PetscBool      match;
4970 
4971   PetscFunctionBegin;
4972   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4973   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4974   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4975   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4976   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4977   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4978   aa = a->a; ba = b->a;
4979   if (scall == MAT_INITIAL_MATRIX) {
4980     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4981     ci[0] = 0;
4982     for (i=0; i<am; i++) {
4983       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4984     }
4985     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4986     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4987     k    = 0;
4988     for (i=0; i<am; i++) {
4989       ncols_o = bi[i+1] - bi[i];
4990       ncols_d = ai[i+1] - ai[i];
4991       /* off-diagonal portion of A */
4992       for (jo=0; jo<ncols_o; jo++) {
4993         col = cmap[*bj];
4994         if (col >= cstart) break;
4995         cj[k]   = col; bj++;
4996         ca[k++] = *ba++;
4997       }
4998       /* diagonal portion of A */
4999       for (j=0; j<ncols_d; j++) {
5000         cj[k]   = cstart + *aj++;
5001         ca[k++] = *aa++;
5002       }
5003       /* off-diagonal portion of A */
5004       for (j=jo; j<ncols_o; j++) {
5005         cj[k]   = cmap[*bj++];
5006         ca[k++] = *ba++;
5007       }
5008     }
5009     /* put together the new matrix */
5010     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5011     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5012     /* Since these are PETSc arrays, change flags to free them as necessary. */
5013     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5014     mat->free_a  = PETSC_TRUE;
5015     mat->free_ij = PETSC_TRUE;
5016     mat->nonew   = 0;
5017   } else if (scall == MAT_REUSE_MATRIX) {
5018     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5019     ci = mat->i; cj = mat->j; cam = mat->a;
5020     for (i=0; i<am; i++) {
5021       /* off-diagonal portion of A */
5022       ncols_o = bi[i+1] - bi[i];
5023       for (jo=0; jo<ncols_o; jo++) {
5024         col = cmap[*bj];
5025         if (col >= cstart) break;
5026         *cam++ = *ba++; bj++;
5027       }
5028       /* diagonal portion of A */
5029       ncols_d = ai[i+1] - ai[i];
5030       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5031       /* off-diagonal portion of A */
5032       for (j=jo; j<ncols_o; j++) {
5033         *cam++ = *ba++; bj++;
5034       }
5035     }
5036   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5037   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5038   PetscFunctionReturn(0);
5039 }
5040 
5041 #undef __FUNCT__
5042 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5043 /*@C
5044      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5045 
5046     Not Collective
5047 
5048    Input Parameters:
5049 +    A - the matrix
5050 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5051 -    row, col - index sets of rows and columns to extract (or NULL)
5052 
5053    Output Parameter:
5054 .    A_loc - the local sequential matrix generated
5055 
5056     Level: developer
5057 
5058 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5059 
5060 @*/
5061 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5062 {
5063   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5064   PetscErrorCode ierr;
5065   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5066   IS             isrowa,iscola;
5067   Mat            *aloc;
5068   PetscBool      match;
5069 
5070   PetscFunctionBegin;
5071   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5072   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5073   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5074   if (!row) {
5075     start = A->rmap->rstart; end = A->rmap->rend;
5076     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5077   } else {
5078     isrowa = *row;
5079   }
5080   if (!col) {
5081     start = A->cmap->rstart;
5082     cmap  = a->garray;
5083     nzA   = a->A->cmap->n;
5084     nzB   = a->B->cmap->n;
5085     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5086     ncols = 0;
5087     for (i=0; i<nzB; i++) {
5088       if (cmap[i] < start) idx[ncols++] = cmap[i];
5089       else break;
5090     }
5091     imark = i;
5092     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5093     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5094     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5095   } else {
5096     iscola = *col;
5097   }
5098   if (scall != MAT_INITIAL_MATRIX) {
5099     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5100     aloc[0] = *A_loc;
5101   }
5102   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5103   *A_loc = aloc[0];
5104   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5105   if (!row) {
5106     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5107   }
5108   if (!col) {
5109     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5110   }
5111   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5112   PetscFunctionReturn(0);
5113 }
5114 
5115 #undef __FUNCT__
5116 #define __FUNCT__ "MatGetBrowsOfAcols"
5117 /*@C
5118     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5119 
5120     Collective on Mat
5121 
5122    Input Parameters:
5123 +    A,B - the matrices in mpiaij format
5124 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5125 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5126 
5127    Output Parameter:
5128 +    rowb, colb - index sets of rows and columns of B to extract
5129 -    B_seq - the sequential matrix generated
5130 
5131     Level: developer
5132 
5133 @*/
5134 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5135 {
5136   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5137   PetscErrorCode ierr;
5138   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5139   IS             isrowb,iscolb;
5140   Mat            *bseq=NULL;
5141 
5142   PetscFunctionBegin;
5143   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5144     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5145   }
5146   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5147 
5148   if (scall == MAT_INITIAL_MATRIX) {
5149     start = A->cmap->rstart;
5150     cmap  = a->garray;
5151     nzA   = a->A->cmap->n;
5152     nzB   = a->B->cmap->n;
5153     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5154     ncols = 0;
5155     for (i=0; i<nzB; i++) {  /* row < local row index */
5156       if (cmap[i] < start) idx[ncols++] = cmap[i];
5157       else break;
5158     }
5159     imark = i;
5160     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5161     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5162     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5163     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5164   } else {
5165     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5166     isrowb  = *rowb; iscolb = *colb;
5167     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5168     bseq[0] = *B_seq;
5169   }
5170   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5171   *B_seq = bseq[0];
5172   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5173   if (!rowb) {
5174     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5175   } else {
5176     *rowb = isrowb;
5177   }
5178   if (!colb) {
5179     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5180   } else {
5181     *colb = iscolb;
5182   }
5183   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5184   PetscFunctionReturn(0);
5185 }
5186 
5187 #undef __FUNCT__
5188 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5189 /*
5190     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5191     of the OFF-DIAGONAL portion of local A
5192 
5193     Collective on Mat
5194 
5195    Input Parameters:
5196 +    A,B - the matrices in mpiaij format
5197 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5198 
5199    Output Parameter:
5200 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5201 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5202 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5203 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5204 
5205     Level: developer
5206 
5207 */
5208 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5209 {
5210   VecScatter_MPI_General *gen_to,*gen_from;
5211   PetscErrorCode         ierr;
5212   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5213   Mat_SeqAIJ             *b_oth;
5214   VecScatter             ctx =a->Mvctx;
5215   MPI_Comm               comm;
5216   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5217   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5218   PetscScalar            *rvalues,*svalues;
5219   MatScalar              *b_otha,*bufa,*bufA;
5220   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5221   MPI_Request            *rwaits = NULL,*swaits = NULL;
5222   MPI_Status             *sstatus,rstatus;
5223   PetscMPIInt            jj;
5224   PetscInt               *cols,sbs,rbs;
5225   PetscScalar            *vals;
5226 
5227   PetscFunctionBegin;
5228   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5229   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5230     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5231   }
5232   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5233   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5234 
5235   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5236   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5237   rvalues  = gen_from->values; /* holds the length of receiving row */
5238   svalues  = gen_to->values;   /* holds the length of sending row */
5239   nrecvs   = gen_from->n;
5240   nsends   = gen_to->n;
5241 
5242   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5243   srow    = gen_to->indices;    /* local row index to be sent */
5244   sstarts = gen_to->starts;
5245   sprocs  = gen_to->procs;
5246   sstatus = gen_to->sstatus;
5247   sbs     = gen_to->bs;
5248   rstarts = gen_from->starts;
5249   rprocs  = gen_from->procs;
5250   rbs     = gen_from->bs;
5251 
5252   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5253   if (scall == MAT_INITIAL_MATRIX) {
5254     /* i-array */
5255     /*---------*/
5256     /*  post receives */
5257     for (i=0; i<nrecvs; i++) {
5258       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5259       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5260       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5261     }
5262 
5263     /* pack the outgoing message */
5264     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5265 
5266     sstartsj[0] = 0;
5267     rstartsj[0] = 0;
5268     len         = 0; /* total length of j or a array to be sent */
5269     k           = 0;
5270     for (i=0; i<nsends; i++) {
5271       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5272       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5273       for (j=0; j<nrows; j++) {
5274         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5275         for (l=0; l<sbs; l++) {
5276           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5277 
5278           rowlen[j*sbs+l] = ncols;
5279 
5280           len += ncols;
5281           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5282         }
5283         k++;
5284       }
5285       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5286 
5287       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5288     }
5289     /* recvs and sends of i-array are completed */
5290     i = nrecvs;
5291     while (i--) {
5292       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5293     }
5294     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5295 
5296     /* allocate buffers for sending j and a arrays */
5297     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5298     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5299 
5300     /* create i-array of B_oth */
5301     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5302 
5303     b_othi[0] = 0;
5304     len       = 0; /* total length of j or a array to be received */
5305     k         = 0;
5306     for (i=0; i<nrecvs; i++) {
5307       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5308       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5309       for (j=0; j<nrows; j++) {
5310         b_othi[k+1] = b_othi[k] + rowlen[j];
5311         len        += rowlen[j]; k++;
5312       }
5313       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5314     }
5315 
5316     /* allocate space for j and a arrrays of B_oth */
5317     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5318     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5319 
5320     /* j-array */
5321     /*---------*/
5322     /*  post receives of j-array */
5323     for (i=0; i<nrecvs; i++) {
5324       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5325       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5326     }
5327 
5328     /* pack the outgoing message j-array */
5329     k = 0;
5330     for (i=0; i<nsends; i++) {
5331       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5332       bufJ  = bufj+sstartsj[i];
5333       for (j=0; j<nrows; j++) {
5334         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5335         for (ll=0; ll<sbs; ll++) {
5336           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5337           for (l=0; l<ncols; l++) {
5338             *bufJ++ = cols[l];
5339           }
5340           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5341         }
5342       }
5343       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5344     }
5345 
5346     /* recvs and sends of j-array are completed */
5347     i = nrecvs;
5348     while (i--) {
5349       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5350     }
5351     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5352   } else if (scall == MAT_REUSE_MATRIX) {
5353     sstartsj = *startsj_s;
5354     rstartsj = *startsj_r;
5355     bufa     = *bufa_ptr;
5356     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5357     b_otha   = b_oth->a;
5358   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5359 
5360   /* a-array */
5361   /*---------*/
5362   /*  post receives of a-array */
5363   for (i=0; i<nrecvs; i++) {
5364     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5365     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5366   }
5367 
5368   /* pack the outgoing message a-array */
5369   k = 0;
5370   for (i=0; i<nsends; i++) {
5371     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5372     bufA  = bufa+sstartsj[i];
5373     for (j=0; j<nrows; j++) {
5374       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5375       for (ll=0; ll<sbs; ll++) {
5376         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5377         for (l=0; l<ncols; l++) {
5378           *bufA++ = vals[l];
5379         }
5380         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5381       }
5382     }
5383     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5384   }
5385   /* recvs and sends of a-array are completed */
5386   i = nrecvs;
5387   while (i--) {
5388     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5389   }
5390   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5391   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5392 
5393   if (scall == MAT_INITIAL_MATRIX) {
5394     /* put together the new matrix */
5395     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5396 
5397     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5398     /* Since these are PETSc arrays, change flags to free them as necessary. */
5399     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5400     b_oth->free_a  = PETSC_TRUE;
5401     b_oth->free_ij = PETSC_TRUE;
5402     b_oth->nonew   = 0;
5403 
5404     ierr = PetscFree(bufj);CHKERRQ(ierr);
5405     if (!startsj_s || !bufa_ptr) {
5406       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5407       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5408     } else {
5409       *startsj_s = sstartsj;
5410       *startsj_r = rstartsj;
5411       *bufa_ptr  = bufa;
5412     }
5413   }
5414   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5415   PetscFunctionReturn(0);
5416 }
5417 
5418 #undef __FUNCT__
5419 #define __FUNCT__ "MatGetCommunicationStructs"
5420 /*@C
5421   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5422 
5423   Not Collective
5424 
5425   Input Parameters:
5426 . A - The matrix in mpiaij format
5427 
5428   Output Parameter:
5429 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5430 . colmap - A map from global column index to local index into lvec
5431 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5432 
5433   Level: developer
5434 
5435 @*/
5436 #if defined(PETSC_USE_CTABLE)
5437 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5438 #else
5439 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5440 #endif
5441 {
5442   Mat_MPIAIJ *a;
5443 
5444   PetscFunctionBegin;
5445   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5446   PetscValidPointer(lvec, 2);
5447   PetscValidPointer(colmap, 3);
5448   PetscValidPointer(multScatter, 4);
5449   a = (Mat_MPIAIJ*) A->data;
5450   if (lvec) *lvec = a->lvec;
5451   if (colmap) *colmap = a->colmap;
5452   if (multScatter) *multScatter = a->Mvctx;
5453   PetscFunctionReturn(0);
5454 }
5455 
5456 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5457 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5459 
5460 #undef __FUNCT__
5461 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5462 /*
5463     Computes (B'*A')' since computing B*A directly is untenable
5464 
5465                n                       p                          p
5466         (              )       (              )         (                  )
5467       m (      A       )  *  n (       B      )   =   m (         C        )
5468         (              )       (              )         (                  )
5469 
5470 */
5471 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5472 {
5473   PetscErrorCode ierr;
5474   Mat            At,Bt,Ct;
5475 
5476   PetscFunctionBegin;
5477   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5478   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5479   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5480   ierr = MatDestroy(&At);CHKERRQ(ierr);
5481   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5482   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5483   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5484   PetscFunctionReturn(0);
5485 }
5486 
5487 #undef __FUNCT__
5488 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5489 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5490 {
5491   PetscErrorCode ierr;
5492   PetscInt       m=A->rmap->n,n=B->cmap->n;
5493   Mat            Cmat;
5494 
5495   PetscFunctionBegin;
5496   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5497   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5498   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5499   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5500   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5501   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5502   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5503   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5504 
5505   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5506 
5507   *C = Cmat;
5508   PetscFunctionReturn(0);
5509 }
5510 
5511 /* ----------------------------------------------------------------*/
5512 #undef __FUNCT__
5513 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5514 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5515 {
5516   PetscErrorCode ierr;
5517 
5518   PetscFunctionBegin;
5519   if (scall == MAT_INITIAL_MATRIX) {
5520     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5521     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5522     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5523   }
5524   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5525   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5526   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5527   PetscFunctionReturn(0);
5528 }
5529 
5530 #if defined(PETSC_HAVE_MUMPS)
5531 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5532 #endif
5533 #if defined(PETSC_HAVE_PASTIX)
5534 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5535 #endif
5536 #if defined(PETSC_HAVE_SUPERLU_DIST)
5537 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5538 #endif
5539 #if defined(PETSC_HAVE_CLIQUE)
5540 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5541 #endif
5542 
5543 /*MC
5544    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5545 
5546    Options Database Keys:
5547 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5548 
5549   Level: beginner
5550 
5551 .seealso: MatCreateAIJ()
5552 M*/
5553 
5554 #undef __FUNCT__
5555 #define __FUNCT__ "MatCreate_MPIAIJ"
5556 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5557 {
5558   Mat_MPIAIJ     *b;
5559   PetscErrorCode ierr;
5560   PetscMPIInt    size;
5561 
5562   PetscFunctionBegin;
5563   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5564 
5565   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5566   B->data       = (void*)b;
5567   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5568   B->assembled  = PETSC_FALSE;
5569   B->insertmode = NOT_SET_VALUES;
5570   b->size       = size;
5571 
5572   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5573 
5574   /* build cache for off array entries formed */
5575   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5576 
5577   b->donotstash  = PETSC_FALSE;
5578   b->colmap      = 0;
5579   b->garray      = 0;
5580   b->roworiented = PETSC_TRUE;
5581 
5582   /* stuff used for matrix vector multiply */
5583   b->lvec  = NULL;
5584   b->Mvctx = NULL;
5585 
5586   /* stuff for MatGetRow() */
5587   b->rowindices   = 0;
5588   b->rowvalues    = 0;
5589   b->getrowactive = PETSC_FALSE;
5590 
5591   /* flexible pointer used in CUSP/CUSPARSE classes */
5592   b->spptr = NULL;
5593 
5594 #if defined(PETSC_HAVE_MUMPS)
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5596 #endif
5597 #if defined(PETSC_HAVE_PASTIX)
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5599 #endif
5600 #if defined(PETSC_HAVE_SUPERLU_DIST)
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5602 #endif
5603 #if defined(PETSC_HAVE_CLIQUE)
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5605 #endif
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5620   PetscFunctionReturn(0);
5621 }
5622 
5623 #undef __FUNCT__
5624 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5625 /*@
5626      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5627          and "off-diagonal" part of the matrix in CSR format.
5628 
5629    Collective on MPI_Comm
5630 
5631    Input Parameters:
5632 +  comm - MPI communicator
5633 .  m - number of local rows (Cannot be PETSC_DECIDE)
5634 .  n - This value should be the same as the local size used in creating the
5635        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5636        calculated if N is given) For square matrices n is almost always m.
5637 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5638 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5639 .   i - row indices for "diagonal" portion of matrix
5640 .   j - column indices
5641 .   a - matrix values
5642 .   oi - row indices for "off-diagonal" portion of matrix
5643 .   oj - column indices
5644 -   oa - matrix values
5645 
5646    Output Parameter:
5647 .   mat - the matrix
5648 
5649    Level: advanced
5650 
5651    Notes:
5652        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5653        must free the arrays once the matrix has been destroyed and not before.
5654 
5655        The i and j indices are 0 based
5656 
5657        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5658 
5659        This sets local rows and cannot be used to set off-processor values.
5660 
5661        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5662        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5663        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5664        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5665        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5666        communication if it is known that only local entries will be set.
5667 
5668 .keywords: matrix, aij, compressed row, sparse, parallel
5669 
5670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5671           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5672 @*/
5673 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5674 {
5675   PetscErrorCode ierr;
5676   Mat_MPIAIJ     *maij;
5677 
5678   PetscFunctionBegin;
5679   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5680   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5681   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5682   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5683   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5684   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5685   maij = (Mat_MPIAIJ*) (*mat)->data;
5686 
5687   (*mat)->preallocated = PETSC_TRUE;
5688 
5689   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5690   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5691 
5692   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5693   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5694 
5695   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5697   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5698   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699 
5700   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5702   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5703   PetscFunctionReturn(0);
5704 }
5705 
5706 /*
5707     Special version for direct calls from Fortran
5708 */
5709 #include <petsc-private/fortranimpl.h>
5710 
5711 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5712 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5713 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5714 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5715 #endif
5716 
5717 /* Change these macros so can be used in void function */
5718 #undef CHKERRQ
5719 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5720 #undef SETERRQ2
5721 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5722 #undef SETERRQ3
5723 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5724 #undef SETERRQ
5725 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5726 
5727 #undef __FUNCT__
5728 #define __FUNCT__ "matsetvaluesmpiaij_"
5729 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5730 {
5731   Mat            mat  = *mmat;
5732   PetscInt       m    = *mm, n = *mn;
5733   InsertMode     addv = *maddv;
5734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5735   PetscScalar    value;
5736   PetscErrorCode ierr;
5737 
5738   MatCheckPreallocated(mat,1);
5739   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5740 
5741 #if defined(PETSC_USE_DEBUG)
5742   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5743 #endif
5744   {
5745     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5746     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5747     PetscBool roworiented = aij->roworiented;
5748 
5749     /* Some Variables required in the macro */
5750     Mat        A                 = aij->A;
5751     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5752     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5753     MatScalar  *aa               = a->a;
5754     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5755     Mat        B                 = aij->B;
5756     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5757     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5758     MatScalar  *ba               = b->a;
5759 
5760     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5761     PetscInt  nonew = a->nonew;
5762     MatScalar *ap1,*ap2;
5763 
5764     PetscFunctionBegin;
5765     for (i=0; i<m; i++) {
5766       if (im[i] < 0) continue;
5767 #if defined(PETSC_USE_DEBUG)
5768       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5769 #endif
5770       if (im[i] >= rstart && im[i] < rend) {
5771         row      = im[i] - rstart;
5772         lastcol1 = -1;
5773         rp1      = aj + ai[row];
5774         ap1      = aa + ai[row];
5775         rmax1    = aimax[row];
5776         nrow1    = ailen[row];
5777         low1     = 0;
5778         high1    = nrow1;
5779         lastcol2 = -1;
5780         rp2      = bj + bi[row];
5781         ap2      = ba + bi[row];
5782         rmax2    = bimax[row];
5783         nrow2    = bilen[row];
5784         low2     = 0;
5785         high2    = nrow2;
5786 
5787         for (j=0; j<n; j++) {
5788           if (roworiented) value = v[i*n+j];
5789           else value = v[i+j*m];
5790           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5791           if (in[j] >= cstart && in[j] < cend) {
5792             col = in[j] - cstart;
5793             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5794           } else if (in[j] < 0) continue;
5795 #if defined(PETSC_USE_DEBUG)
5796           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5797 #endif
5798           else {
5799             if (mat->was_assembled) {
5800               if (!aij->colmap) {
5801                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5802               }
5803 #if defined(PETSC_USE_CTABLE)
5804               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5805               col--;
5806 #else
5807               col = aij->colmap[in[j]] - 1;
5808 #endif
5809               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5810                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5811                 col  =  in[j];
5812                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5813                 B     = aij->B;
5814                 b     = (Mat_SeqAIJ*)B->data;
5815                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5816                 rp2   = bj + bi[row];
5817                 ap2   = ba + bi[row];
5818                 rmax2 = bimax[row];
5819                 nrow2 = bilen[row];
5820                 low2  = 0;
5821                 high2 = nrow2;
5822                 bm    = aij->B->rmap->n;
5823                 ba    = b->a;
5824               }
5825             } else col = in[j];
5826             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5827           }
5828         }
5829       } else if (!aij->donotstash) {
5830         if (roworiented) {
5831           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5832         } else {
5833           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5834         }
5835       }
5836     }
5837   }
5838   PetscFunctionReturnVoid();
5839 }
5840 
5841