xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 81bf3e96a9cdfe274002929fd2ad73122acb412b)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   /* used by MatAXPY() */
730   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
731   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
732 
733   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
734   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
735 
736   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
737   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
738     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
739     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
747 {
748   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
753   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatZeroRows_MPIAIJ"
759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
760 {
761   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
762   PetscInt      *owners = A->rmap->range;
763   PetscInt       n      = A->rmap->n;
764   PetscSF        sf;
765   PetscInt      *lrows;
766   PetscSFNode   *rrows;
767   PetscInt       r, p = 0, len = 0;
768   PetscErrorCode ierr;
769 
770   PetscFunctionBegin;
771   /* Create SF where leaves are input rows and roots are owned rows */
772   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
773   for (r = 0; r < n; ++r) lrows[r] = -1;
774   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
775   for (r = 0; r < N; ++r) {
776     const PetscInt idx   = rows[r];
777     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
778     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
779       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
780     }
781     if (A->nooffproczerorows) {
782       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
783       lrows[len++] = idx - owners[p];
784     } else {
785       rrows[r].rank = p;
786       rrows[r].index = rows[r] - owners[p];
787     }
788   }
789   if (!A->nooffproczerorows) {
790     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
791     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
792     /* Collect flags for rows to be zeroed */
793     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
794     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
795     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
796     /* Compress and put in row numbers */
797     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
798   }
799   /* fix right hand side if needed */
800   if (x && b) {
801     const PetscScalar *xx;
802     PetscScalar       *bb;
803 
804     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
805     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
806     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
807     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
808     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
809   }
810   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
811   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_Redundant"
1121 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1122 {
1123   PetscErrorCode ierr;
1124   Mat_Redundant  *redund = *redundant;
1125   PetscInt       i;
1126 
1127   PetscFunctionBegin;
1128   *redundant = NULL;
1129   if (redund){
1130     if (redund->matseq) { /* via MatGetSubMatrices()  */
1131       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1132       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1133       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1134       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1135     } else {
1136       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1137       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1138       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1139       for (i=0; i<redund->nrecvs; i++) {
1140         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1141         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1142       }
1143       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1144     }
1145 
1146     if (redund->psubcomm) {
1147       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1148     }
1149     ierr = PetscFree(redund);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatDestroy_MPIAIJ"
1156 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1157 {
1158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162 #if defined(PETSC_USE_LOG)
1163   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1164 #endif
1165   ierr = MatDestroy_Redundant(&mat->redundant);CHKERRQ(ierr);
1166   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1167   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1168   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1169   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1170 #if defined(PETSC_USE_CTABLE)
1171   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1172 #else
1173   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1176   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1177   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1178   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1179   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1180   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1181 
1182   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1191 #if defined(PETSC_HAVE_ELEMENTAL)
1192   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1193 #endif
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1200 {
1201   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1202   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1203   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1204   PetscErrorCode ierr;
1205   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1206   int            fd;
1207   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1208   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1209   PetscScalar    *column_values;
1210   PetscInt       message_count,flowcontrolcount;
1211   FILE           *file;
1212 
1213   PetscFunctionBegin;
1214   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1215   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1216   nz   = A->nz + B->nz;
1217   if (!rank) {
1218     header[0] = MAT_FILE_CLASSID;
1219     header[1] = mat->rmap->N;
1220     header[2] = mat->cmap->N;
1221 
1222     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1223     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1224     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     /* get largest number of rows any processor has */
1226     rlen  = mat->rmap->n;
1227     range = mat->rmap->range;
1228     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1229   } else {
1230     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     rlen = mat->rmap->n;
1232   }
1233 
1234   /* load up the local row counts */
1235   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1236   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1237 
1238   /* store the row lengths to the file */
1239   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1240   if (!rank) {
1241     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1244       rlen = range[i+1] - range[i];
1245       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1246       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1247     }
1248     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1249   } else {
1250     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1251     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1255 
1256   /* load up the local column indices */
1257   nzmax = nz; /* th processor needs space a largest processor needs */
1258   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1260   cnt   = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if ((col = garray[B->j[j]]) > cstart) break;
1264       column_indices[cnt++] = col;
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1267     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1268   }
1269   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1270 
1271   /* store the column indices to the file */
1272   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1273   if (!rank) {
1274     MPI_Status status;
1275     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1276     for (i=1; i<size; i++) {
1277       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1278       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1279       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1280       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1282     }
1283     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1284   } else {
1285     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1286     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1288     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1289   }
1290   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1291 
1292   /* load up the local column values */
1293   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1294   cnt  = 0;
1295   for (i=0; i<mat->rmap->n; i++) {
1296     for (j=B->i[i]; j<B->i[i+1]; j++) {
1297       if (garray[B->j[j]] > cstart) break;
1298       column_values[cnt++] = B->a[j];
1299     }
1300     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1301     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1302   }
1303   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1304 
1305   /* store the column values to the file */
1306   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1307   if (!rank) {
1308     MPI_Status status;
1309     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1310     for (i=1; i<size; i++) {
1311       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1312       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1313       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1314       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1316     }
1317     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1318   } else {
1319     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1320     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1322     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1323   }
1324   ierr = PetscFree(column_values);CHKERRQ(ierr);
1325 
1326   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1327   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #include <petscdraw.h>
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1334 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1335 {
1336   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1337   PetscErrorCode    ierr;
1338   PetscMPIInt       rank = aij->rank,size = aij->size;
1339   PetscBool         isdraw,iascii,isbinary;
1340   PetscViewer       sviewer;
1341   PetscViewerFormat format;
1342 
1343   PetscFunctionBegin;
1344   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1345   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1346   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1347   if (iascii) {
1348     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1349     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1350       MatInfo   info;
1351       PetscBool inodes;
1352 
1353       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1354       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1356       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1357       if (!inodes) {
1358         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1359                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1360       } else {
1361         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1362                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1363       }
1364       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1366       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1367       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1368       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1370       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1371       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1372       PetscFunctionReturn(0);
1373     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1374       PetscInt inodecount,inodelimit,*inodes;
1375       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1376       if (inodes) {
1377         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1378       } else {
1379         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1380       }
1381       PetscFunctionReturn(0);
1382     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1383       PetscFunctionReturn(0);
1384     }
1385   } else if (isbinary) {
1386     if (size == 1) {
1387       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1388       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1389     } else {
1390       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1391     }
1392     PetscFunctionReturn(0);
1393   } else if (isdraw) {
1394     PetscDraw draw;
1395     PetscBool isnull;
1396     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1397     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1398   }
1399 
1400   {
1401     /* assemble the entire matrix onto first processor. */
1402     Mat        A;
1403     Mat_SeqAIJ *Aloc;
1404     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1405     MatScalar  *a;
1406 
1407     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1408     if (!rank) {
1409       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1410     } else {
1411       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1412     }
1413     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1414     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1415     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1416     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1417     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1418 
1419     /* copy over the A part */
1420     Aloc = (Mat_SeqAIJ*)aij->A->data;
1421     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1422     row  = mat->rmap->rstart;
1423     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1424     for (i=0; i<m; i++) {
1425       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1426       row++;
1427       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1428     }
1429     aj = Aloc->j;
1430     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1431 
1432     /* copy over the B part */
1433     Aloc = (Mat_SeqAIJ*)aij->B->data;
1434     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1435     row  = mat->rmap->rstart;
1436     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1437     ct   = cols;
1438     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1439     for (i=0; i<m; i++) {
1440       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1441       row++;
1442       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1443     }
1444     ierr = PetscFree(ct);CHKERRQ(ierr);
1445     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1447     /*
1448        Everyone has to call to draw the matrix since the graphics waits are
1449        synchronized across all processors that share the PetscDraw object
1450     */
1451     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1452     if (!rank) {
1453       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1454     }
1455     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1456     ierr = MatDestroy(&A);CHKERRQ(ierr);
1457   }
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatView_MPIAIJ"
1463 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1464 {
1465   PetscErrorCode ierr;
1466   PetscBool      iascii,isdraw,issocket,isbinary;
1467 
1468   PetscFunctionBegin;
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1473   if (iascii || isdraw || isbinary || issocket) {
1474     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1475   }
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatSOR_MPIAIJ"
1481 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1482 {
1483   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1484   PetscErrorCode ierr;
1485   Vec            bb1 = 0;
1486   PetscBool      hasop;
1487 
1488   PetscFunctionBegin;
1489   if (flag == SOR_APPLY_UPPER) {
1490     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1491     PetscFunctionReturn(0);
1492   }
1493 
1494   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1495     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1496   }
1497 
1498   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1499     if (flag & SOR_ZERO_INITIAL_GUESS) {
1500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1501       its--;
1502     }
1503 
1504     while (its--) {
1505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507 
1508       /* update rhs: bb1 = bb - B*x */
1509       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1510       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1511 
1512       /* local sweep */
1513       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1514     }
1515   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1516     if (flag & SOR_ZERO_INITIAL_GUESS) {
1517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1518       its--;
1519     }
1520     while (its--) {
1521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523 
1524       /* update rhs: bb1 = bb - B*x */
1525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1527 
1528       /* local sweep */
1529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1530     }
1531   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1532     if (flag & SOR_ZERO_INITIAL_GUESS) {
1533       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1534       its--;
1535     }
1536     while (its--) {
1537       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539 
1540       /* update rhs: bb1 = bb - B*x */
1541       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1542       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1543 
1544       /* local sweep */
1545       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1546     }
1547   } else if (flag & SOR_EISENSTAT) {
1548     Vec xx1;
1549 
1550     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1551     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1552 
1553     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555     if (!mat->diag) {
1556       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1557       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1558     }
1559     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1560     if (hasop) {
1561       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1562     } else {
1563       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1564     }
1565     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1566 
1567     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1568 
1569     /* local sweep */
1570     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1571     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1572     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1573   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1574 
1575   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatPermute_MPIAIJ"
1581 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1582 {
1583   Mat            aA,aB,Aperm;
1584   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1585   PetscScalar    *aa,*ba;
1586   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1587   PetscSF        rowsf,sf;
1588   IS             parcolp = NULL;
1589   PetscBool      done;
1590   PetscErrorCode ierr;
1591 
1592   PetscFunctionBegin;
1593   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1594   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1595   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1596   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1597 
1598   /* Invert row permutation to find out where my rows should go */
1599   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1600   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1601   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1602   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1603   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1605 
1606   /* Invert column permutation to find out where my columns should go */
1607   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1608   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1609   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1610   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1611   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1612   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1613   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1614 
1615   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1616   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1617   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1618 
1619   /* Find out where my gcols should go */
1620   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1621   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1622   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1623   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1624   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1625   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1626   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1627   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1628 
1629   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1630   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1631   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1632   for (i=0; i<m; i++) {
1633     PetscInt row = rdest[i],rowner;
1634     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1635     for (j=ai[i]; j<ai[i+1]; j++) {
1636       PetscInt cowner,col = cdest[aj[j]];
1637       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1638       if (rowner == cowner) dnnz[i]++;
1639       else onnz[i]++;
1640     }
1641     for (j=bi[i]; j<bi[i+1]; j++) {
1642       PetscInt cowner,col = gcdest[bj[j]];
1643       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1644       if (rowner == cowner) dnnz[i]++;
1645       else onnz[i]++;
1646     }
1647   }
1648   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1649   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1650   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1651   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1652   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1653 
1654   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1655   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1656   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) {
1658     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1659     PetscInt j0,rowlen;
1660     rowlen = ai[i+1] - ai[i];
1661     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1662       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1663       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     rowlen = bi[i+1] - bi[i];
1666     for (j0=j=0; j<rowlen; j0=j) {
1667       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1668       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1669     }
1670   }
1671   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1673   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1674   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1675   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1676   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1677   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1678   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1679   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1680   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1681   *B = Aperm;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1687 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1688 {
1689   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1690   Mat            A    = mat->A,B = mat->B;
1691   PetscErrorCode ierr;
1692   PetscReal      isend[5],irecv[5];
1693 
1694   PetscFunctionBegin;
1695   info->block_size = 1.0;
1696   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1697 
1698   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1699   isend[3] = info->memory;  isend[4] = info->mallocs;
1700 
1701   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1702 
1703   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1704   isend[3] += info->memory;  isend[4] += info->mallocs;
1705   if (flag == MAT_LOCAL) {
1706     info->nz_used      = isend[0];
1707     info->nz_allocated = isend[1];
1708     info->nz_unneeded  = isend[2];
1709     info->memory       = isend[3];
1710     info->mallocs      = isend[4];
1711   } else if (flag == MAT_GLOBAL_MAX) {
1712     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1713 
1714     info->nz_used      = irecv[0];
1715     info->nz_allocated = irecv[1];
1716     info->nz_unneeded  = irecv[2];
1717     info->memory       = irecv[3];
1718     info->mallocs      = irecv[4];
1719   } else if (flag == MAT_GLOBAL_SUM) {
1720     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1721 
1722     info->nz_used      = irecv[0];
1723     info->nz_allocated = irecv[1];
1724     info->nz_unneeded  = irecv[2];
1725     info->memory       = irecv[3];
1726     info->mallocs      = irecv[4];
1727   }
1728   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1729   info->fill_ratio_needed = 0;
1730   info->factor_mallocs    = 0;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatSetOption_MPIAIJ"
1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1737 {
1738   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   switch (op) {
1743   case MAT_NEW_NONZERO_LOCATIONS:
1744   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1745   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1746   case MAT_KEEP_NONZERO_PATTERN:
1747   case MAT_NEW_NONZERO_LOCATION_ERR:
1748   case MAT_USE_INODES:
1749   case MAT_IGNORE_ZERO_ENTRIES:
1750     MatCheckPreallocated(A,1);
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_ROW_ORIENTED:
1755     a->roworiented = flg;
1756 
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_NEW_DIAGONALS:
1761     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1762     break;
1763   case MAT_IGNORE_OFF_PROC_ENTRIES:
1764     a->donotstash = flg;
1765     break;
1766   case MAT_SPD:
1767     A->spd_set = PETSC_TRUE;
1768     A->spd     = flg;
1769     if (flg) {
1770       A->symmetric                  = PETSC_TRUE;
1771       A->structurally_symmetric     = PETSC_TRUE;
1772       A->symmetric_set              = PETSC_TRUE;
1773       A->structurally_symmetric_set = PETSC_TRUE;
1774     }
1775     break;
1776   case MAT_SYMMETRIC:
1777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1778     break;
1779   case MAT_STRUCTURALLY_SYMMETRIC:
1780     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1781     break;
1782   case MAT_HERMITIAN:
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     break;
1785   case MAT_SYMMETRY_ETERNAL:
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   default:
1789     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1790   }
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatGetRow_MPIAIJ"
1796 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1797 {
1798   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1799   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1800   PetscErrorCode ierr;
1801   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1802   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1803   PetscInt       *cmap,*idx_p;
1804 
1805   PetscFunctionBegin;
1806   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1807   mat->getrowactive = PETSC_TRUE;
1808 
1809   if (!mat->rowvalues && (idx || v)) {
1810     /*
1811         allocate enough space to hold information from the longest row.
1812     */
1813     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1814     PetscInt   max = 1,tmp;
1815     for (i=0; i<matin->rmap->n; i++) {
1816       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1817       if (max < tmp) max = tmp;
1818     }
1819     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1820   }
1821 
1822   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1823   lrow = row - rstart;
1824 
1825   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1826   if (!v)   {pvA = 0; pvB = 0;}
1827   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1828   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1829   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1830   nztot = nzA + nzB;
1831 
1832   cmap = mat->garray;
1833   if (v  || idx) {
1834     if (nztot) {
1835       /* Sort by increasing column numbers, assuming A and B already sorted */
1836       PetscInt imark = -1;
1837       if (v) {
1838         *v = v_p = mat->rowvalues;
1839         for (i=0; i<nzB; i++) {
1840           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1841           else break;
1842         }
1843         imark = i;
1844         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1845         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1846       }
1847       if (idx) {
1848         *idx = idx_p = mat->rowindices;
1849         if (imark > -1) {
1850           for (i=0; i<imark; i++) {
1851             idx_p[i] = cmap[cworkB[i]];
1852           }
1853         } else {
1854           for (i=0; i<nzB; i++) {
1855             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1856             else break;
1857           }
1858           imark = i;
1859         }
1860         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1861         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1862       }
1863     } else {
1864       if (idx) *idx = 0;
1865       if (v)   *v   = 0;
1866     }
1867   }
1868   *nz  = nztot;
1869   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1870   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1876 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1877 {
1878   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1879 
1880   PetscFunctionBegin;
1881   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1882   aij->getrowactive = PETSC_FALSE;
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatNorm_MPIAIJ"
1888 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1889 {
1890   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1891   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1892   PetscErrorCode ierr;
1893   PetscInt       i,j,cstart = mat->cmap->rstart;
1894   PetscReal      sum = 0.0;
1895   MatScalar      *v;
1896 
1897   PetscFunctionBegin;
1898   if (aij->size == 1) {
1899     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1900   } else {
1901     if (type == NORM_FROBENIUS) {
1902       v = amat->a;
1903       for (i=0; i<amat->nz; i++) {
1904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1905       }
1906       v = bmat->a;
1907       for (i=0; i<bmat->nz; i++) {
1908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1909       }
1910       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1911       *norm = PetscSqrtReal(*norm);
1912     } else if (type == NORM_1) { /* max column norm */
1913       PetscReal *tmp,*tmp2;
1914       PetscInt  *jj,*garray = aij->garray;
1915       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1916       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1917       *norm = 0.0;
1918       v     = amat->a; jj = amat->j;
1919       for (j=0; j<amat->nz; j++) {
1920         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1921       }
1922       v = bmat->a; jj = bmat->j;
1923       for (j=0; j<bmat->nz; j++) {
1924         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1925       }
1926       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1927       for (j=0; j<mat->cmap->N; j++) {
1928         if (tmp2[j] > *norm) *norm = tmp2[j];
1929       }
1930       ierr = PetscFree(tmp);CHKERRQ(ierr);
1931       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1932     } else if (type == NORM_INFINITY) { /* max row norm */
1933       PetscReal ntemp = 0.0;
1934       for (j=0; j<aij->A->rmap->n; j++) {
1935         v   = amat->a + amat->i[j];
1936         sum = 0.0;
1937         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1938           sum += PetscAbsScalar(*v); v++;
1939         }
1940         v = bmat->a + bmat->i[j];
1941         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1942           sum += PetscAbsScalar(*v); v++;
1943         }
1944         if (sum > ntemp) ntemp = sum;
1945       }
1946       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1947     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatTranspose_MPIAIJ"
1954 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1955 {
1956   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1957   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1958   PetscErrorCode ierr;
1959   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1960   PetscInt       cstart = A->cmap->rstart,ncol;
1961   Mat            B;
1962   MatScalar      *array;
1963 
1964   PetscFunctionBegin;
1965   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1966 
1967   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1968   ai = Aloc->i; aj = Aloc->j;
1969   bi = Bloc->i; bj = Bloc->j;
1970   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1971     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1972     PetscSFNode          *oloc;
1973     PETSC_UNUSED PetscSF sf;
1974 
1975     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1976     /* compute d_nnz for preallocation */
1977     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1978     for (i=0; i<ai[ma]; i++) {
1979       d_nnz[aj[i]]++;
1980       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1981     }
1982     /* compute local off-diagonal contributions */
1983     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1984     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1985     /* map those to global */
1986     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1987     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1988     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1989     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1990     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1991     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1992     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1993 
1994     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1995     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1996     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1997     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1998     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1999     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2000   } else {
2001     B    = *matout;
2002     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2003     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2004   }
2005 
2006   /* copy over the A part */
2007   array = Aloc->a;
2008   row   = A->rmap->rstart;
2009   for (i=0; i<ma; i++) {
2010     ncol = ai[i+1]-ai[i];
2011     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2012     row++;
2013     array += ncol; aj += ncol;
2014   }
2015   aj = Aloc->j;
2016   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2017 
2018   /* copy over the B part */
2019   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2020   array = Bloc->a;
2021   row   = A->rmap->rstart;
2022   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2023   cols_tmp = cols;
2024   for (i=0; i<mb; i++) {
2025     ncol = bi[i+1]-bi[i];
2026     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2027     row++;
2028     array += ncol; cols_tmp += ncol;
2029   }
2030   ierr = PetscFree(cols);CHKERRQ(ierr);
2031 
2032   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2033   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2034   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2035     *matout = B;
2036   } else {
2037     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2038   }
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2044 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2045 {
2046   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2047   Mat            a    = aij->A,b = aij->B;
2048   PetscErrorCode ierr;
2049   PetscInt       s1,s2,s3;
2050 
2051   PetscFunctionBegin;
2052   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2053   if (rr) {
2054     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2055     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2056     /* Overlap communication with computation. */
2057     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2058   }
2059   if (ll) {
2060     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2061     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2062     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2063   }
2064   /* scale  the diagonal block */
2065   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2066 
2067   if (rr) {
2068     /* Do a scatter end and then right scale the off-diagonal block */
2069     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2070     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2077 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2078 {
2079   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2080   PetscErrorCode ierr;
2081 
2082   PetscFunctionBegin;
2083   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatEqual_MPIAIJ"
2089 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2090 {
2091   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2092   Mat            a,b,c,d;
2093   PetscBool      flg;
2094   PetscErrorCode ierr;
2095 
2096   PetscFunctionBegin;
2097   a = matA->A; b = matA->B;
2098   c = matB->A; d = matB->B;
2099 
2100   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2101   if (flg) {
2102     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2103   }
2104   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatCopy_MPIAIJ"
2110 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2111 {
2112   PetscErrorCode ierr;
2113   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2114   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2115 
2116   PetscFunctionBegin;
2117   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2118   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2119     /* because of the column compression in the off-processor part of the matrix a->B,
2120        the number of columns in a->B and b->B may be different, hence we cannot call
2121        the MatCopy() directly on the two parts. If need be, we can provide a more
2122        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2123        then copying the submatrices */
2124     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2125   } else {
2126     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2127     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatSetUp_MPIAIJ"
2134 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2135 {
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 /*
2144    Computes the number of nonzeros per row needed for preallocation when X and Y
2145    have different nonzero structure.
2146 */
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2149 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2150 {
2151   PetscInt       i,j,k,nzx,nzy;
2152 
2153   PetscFunctionBegin;
2154   /* Set the number of nonzeros in the new matrix */
2155   for (i=0; i<m; i++) {
2156     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2157     nzx = xi[i+1] - xi[i];
2158     nzy = yi[i+1] - yi[i];
2159     nnz[i] = 0;
2160     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2161       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2162       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2163       nnz[i]++;
2164     }
2165     for (; k<nzy; k++) nnz[i]++;
2166   }
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2173 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2174 {
2175   PetscErrorCode ierr;
2176   PetscInt       m = Y->rmap->N;
2177   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2178   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2179 
2180   PetscFunctionBegin;
2181   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatAXPY_MPIAIJ"
2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2188 {
2189   PetscErrorCode ierr;
2190   PetscInt       i;
2191   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2192   PetscBLASInt   bnz,one=1;
2193   Mat_SeqAIJ     *x,*y;
2194 
2195   PetscFunctionBegin;
2196   if (str == SAME_NONZERO_PATTERN) {
2197     PetscScalar alpha = a;
2198     x    = (Mat_SeqAIJ*)xx->A->data;
2199     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2200     y    = (Mat_SeqAIJ*)yy->A->data;
2201     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2202     x    = (Mat_SeqAIJ*)xx->B->data;
2203     y    = (Mat_SeqAIJ*)yy->B->data;
2204     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2205     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2206     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2207   } else if (str == SUBSET_NONZERO_PATTERN) {
2208     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2209 
2210     x = (Mat_SeqAIJ*)xx->B->data;
2211     y = (Mat_SeqAIJ*)yy->B->data;
2212     if (y->xtoy && y->XtoY != xx->B) {
2213       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2214       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2215     }
2216     if (!y->xtoy) { /* get xtoy */
2217       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2218       y->XtoY = xx->B;
2219       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2220     }
2221     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2222     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2223   } else {
2224     Mat      B;
2225     PetscInt *nnz_d,*nnz_o;
2226     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2227     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2228     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2229     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2230     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2231     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2232     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2233     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2234     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2235     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2236     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2237     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2238     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2239     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2240   }
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatConjugate_MPIAIJ"
2248 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2249 {
2250 #if defined(PETSC_USE_COMPLEX)
2251   PetscErrorCode ierr;
2252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2253 
2254   PetscFunctionBegin;
2255   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2256   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2257 #else
2258   PetscFunctionBegin;
2259 #endif
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatRealPart_MPIAIJ"
2265 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2266 {
2267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2272   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #undef __FUNCT__
2277 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2279 {
2280   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2281   PetscErrorCode ierr;
2282 
2283   PetscFunctionBegin;
2284   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2285   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #if defined(PETSC_HAVE_PBGL)
2290 
2291 #include <boost/parallel/mpi/bsp_process_group.hpp>
2292 #include <boost/graph/distributed/ilu_default_graph.hpp>
2293 #include <boost/graph/distributed/ilu_0_block.hpp>
2294 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2295 #include <boost/graph/distributed/petsc/interface.hpp>
2296 #include <boost/multi_array.hpp>
2297 #include <boost/parallel/distributed_property_map->hpp>
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2301 /*
2302   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2303 */
2304 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2305 {
2306   namespace petsc = boost::distributed::petsc;
2307 
2308   namespace graph_dist = boost::graph::distributed;
2309   using boost::graph::distributed::ilu_default::process_group_type;
2310   using boost::graph::ilu_permuted;
2311 
2312   PetscBool      row_identity, col_identity;
2313   PetscContainer c;
2314   PetscInt       m, n, M, N;
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2319   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2320   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2321   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2322 
2323   process_group_type pg;
2324   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2325   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2326   lgraph_type& level_graph = *lgraph_p;
2327   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2328 
2329   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2330   ilu_permuted(level_graph);
2331 
2332   /* put together the new matrix */
2333   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2334   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2335   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2336   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2337   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2338   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2339   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341 
2342   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2343   ierr = PetscContainerSetPointer(c, lgraph_p);
2344   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2345   ierr = PetscContainerDestroy(&c);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2351 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2352 {
2353   PetscFunctionBegin;
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatSolve_MPIAIJ"
2359 /*
2360   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2361 */
2362 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2363 {
2364   namespace graph_dist = boost::graph::distributed;
2365 
2366   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2367   lgraph_type    *lgraph_p;
2368   PetscContainer c;
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2373   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2374   ierr = VecCopy(b, x);CHKERRQ(ierr);
2375 
2376   PetscScalar *array_x;
2377   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2378   PetscInt sx;
2379   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2380 
2381   PetscScalar *array_b;
2382   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2383   PetscInt sb;
2384   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2385 
2386   lgraph_type& level_graph = *lgraph_p;
2387   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2388 
2389   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2390   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2391   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2392 
2393   typedef boost::iterator_property_map<array_ref_type::iterator,
2394                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2395   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2396   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2397 
2398   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2399   PetscFunctionReturn(0);
2400 }
2401 #endif
2402 
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2406 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2407 {
2408   PetscMPIInt    rank,size;
2409   MPI_Comm       comm;
2410   PetscErrorCode ierr;
2411   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2412   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2413   PetscInt       *rowrange = mat->rmap->range;
2414   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2415   Mat            A = aij->A,B=aij->B,C=*matredundant;
2416   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2417   PetscScalar    *sbuf_a;
2418   PetscInt       nzlocal=a->nz+b->nz;
2419   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2420   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2421   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2422   MatScalar      *aworkA,*aworkB;
2423   PetscScalar    *vals;
2424   PetscMPIInt    tag1,tag2,tag3,imdex;
2425   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2426   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2427   MPI_Status     recv_status,*send_status;
2428   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2429   PetscInt       **rbuf_j=NULL;
2430   PetscScalar    **rbuf_a=NULL;
2431   Mat_Redundant  *redund =NULL;
2432 
2433   PetscFunctionBegin;
2434   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2435   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2436   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2437   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2438   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2439 
2440   if (reuse == MAT_REUSE_MATRIX) {
2441     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2442 
2443     redund = C->redundant;
2444     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2445 
2446     nsends    = redund->nsends;
2447     nrecvs    = redund->nrecvs;
2448     send_rank = redund->send_rank;
2449     recv_rank = redund->recv_rank;
2450     sbuf_nz   = redund->sbuf_nz;
2451     rbuf_nz   = redund->rbuf_nz;
2452     sbuf_j    = redund->sbuf_j;
2453     sbuf_a    = redund->sbuf_a;
2454     rbuf_j    = redund->rbuf_j;
2455     rbuf_a    = redund->rbuf_a;
2456   }
2457 
2458   if (reuse == MAT_INITIAL_MATRIX) {
2459     PetscInt    nleftover,np_subcomm;
2460 
2461     /* get the destination processors' id send_rank, nsends and nrecvs */
2462     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2463 
2464     np_subcomm = size/nsubcomm;
2465     nleftover  = size - nsubcomm*np_subcomm;
2466 
2467     /* block of codes below is specific for INTERLACED */
2468     /* ------------------------------------------------*/
2469     nsends = 0; nrecvs = 0;
2470     for (i=0; i<size; i++) {
2471       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2472         send_rank[nsends++] = i;
2473         recv_rank[nrecvs++] = i;
2474       }
2475     }
2476     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2477       i = size-nleftover-1;
2478       j = 0;
2479       while (j < nsubcomm - nleftover) {
2480         send_rank[nsends++] = i;
2481         i--; j++;
2482       }
2483     }
2484 
2485     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2486       for (i=0; i<nleftover; i++) {
2487         recv_rank[nrecvs++] = size-nleftover+i;
2488       }
2489     }
2490     /*----------------------------------------------*/
2491 
2492     /* allocate sbuf_j, sbuf_a */
2493     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2494     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2495     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2496     /*
2497     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2498     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2499      */
2500   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2501 
2502   /* copy mat's local entries into the buffers */
2503   if (reuse == MAT_INITIAL_MATRIX) {
2504     rownz_max = 0;
2505     rptr      = sbuf_j;
2506     cols      = sbuf_j + rend-rstart + 1;
2507     vals      = sbuf_a;
2508     rptr[0]   = 0;
2509     for (i=0; i<rend-rstart; i++) {
2510       row    = i + rstart;
2511       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2512       ncols  = nzA + nzB;
2513       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2514       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2515       /* load the column indices for this row into cols */
2516       lwrite = 0;
2517       for (l=0; l<nzB; l++) {
2518         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2519           vals[lwrite]   = aworkB[l];
2520           cols[lwrite++] = ctmp;
2521         }
2522       }
2523       for (l=0; l<nzA; l++) {
2524         vals[lwrite]   = aworkA[l];
2525         cols[lwrite++] = cstart + cworkA[l];
2526       }
2527       for (l=0; l<nzB; l++) {
2528         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2529           vals[lwrite]   = aworkB[l];
2530           cols[lwrite++] = ctmp;
2531         }
2532       }
2533       vals     += ncols;
2534       cols     += ncols;
2535       rptr[i+1] = rptr[i] + ncols;
2536       if (rownz_max < ncols) rownz_max = ncols;
2537     }
2538     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2539   } else { /* only copy matrix values into sbuf_a */
2540     rptr    = sbuf_j;
2541     vals    = sbuf_a;
2542     rptr[0] = 0;
2543     for (i=0; i<rend-rstart; i++) {
2544       row    = i + rstart;
2545       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2546       ncols  = nzA + nzB;
2547       cworkB = b->j + b->i[i];
2548       aworkA = a->a + a->i[i];
2549       aworkB = b->a + b->i[i];
2550       lwrite = 0;
2551       for (l=0; l<nzB; l++) {
2552         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2553       }
2554       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2555       for (l=0; l<nzB; l++) {
2556         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2557       }
2558       vals     += ncols;
2559       rptr[i+1] = rptr[i] + ncols;
2560     }
2561   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2562 
2563   /* send nzlocal to others, and recv other's nzlocal */
2564   /*--------------------------------------------------*/
2565   if (reuse == MAT_INITIAL_MATRIX) {
2566     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2567 
2568     s_waits2 = s_waits3 + nsends;
2569     s_waits1 = s_waits2 + nsends;
2570     r_waits1 = s_waits1 + nsends;
2571     r_waits2 = r_waits1 + nrecvs;
2572     r_waits3 = r_waits2 + nrecvs;
2573   } else {
2574     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2575 
2576     r_waits3 = s_waits3 + nsends;
2577   }
2578 
2579   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2580   if (reuse == MAT_INITIAL_MATRIX) {
2581     /* get new tags to keep the communication clean */
2582     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2583     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2584     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2585 
2586     /* post receives of other's nzlocal */
2587     for (i=0; i<nrecvs; i++) {
2588       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2589     }
2590     /* send nzlocal to others */
2591     for (i=0; i<nsends; i++) {
2592       sbuf_nz[i] = nzlocal;
2593       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2594     }
2595     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2596     count = nrecvs;
2597     while (count) {
2598       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2599 
2600       recv_rank[imdex] = recv_status.MPI_SOURCE;
2601       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2602       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2603 
2604       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2605 
2606       rbuf_nz[imdex] += i + 2;
2607 
2608       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2609       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2610       count--;
2611     }
2612     /* wait on sends of nzlocal */
2613     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2614     /* send mat->i,j to others, and recv from other's */
2615     /*------------------------------------------------*/
2616     for (i=0; i<nsends; i++) {
2617       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2618       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2619     }
2620     /* wait on receives of mat->i,j */
2621     /*------------------------------*/
2622     count = nrecvs;
2623     while (count) {
2624       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2625       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2626       count--;
2627     }
2628     /* wait on sends of mat->i,j */
2629     /*---------------------------*/
2630     if (nsends) {
2631       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2632     }
2633   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2634 
2635   /* post receives, send and receive mat->a */
2636   /*----------------------------------------*/
2637   for (imdex=0; imdex<nrecvs; imdex++) {
2638     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2639   }
2640   for (i=0; i<nsends; i++) {
2641     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2642   }
2643   count = nrecvs;
2644   while (count) {
2645     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2646     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2647     count--;
2648   }
2649   if (nsends) {
2650     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2651   }
2652 
2653   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2654 
2655   /* create redundant matrix */
2656   /*-------------------------*/
2657   if (reuse == MAT_INITIAL_MATRIX) {
2658     const PetscInt *range;
2659     PetscInt       rstart_sub,rend_sub,mloc_sub;
2660 
2661     /* compute rownz_max for preallocation */
2662     for (imdex=0; imdex<nrecvs; imdex++) {
2663       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2664       rptr = rbuf_j[imdex];
2665       for (i=0; i<j; i++) {
2666         ncols = rptr[i+1] - rptr[i];
2667         if (rownz_max < ncols) rownz_max = ncols;
2668       }
2669     }
2670 
2671     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2672 
2673     /* get local size of redundant matrix
2674        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2675     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2676     rstart_sub = range[nsubcomm*subrank];
2677     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2678       rend_sub = range[nsubcomm*(subrank+1)];
2679     } else {
2680       rend_sub = mat->rmap->N;
2681     }
2682     mloc_sub = rend_sub - rstart_sub;
2683 
2684     if (M == N) {
2685       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2686     } else { /* non-square matrix */
2687       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2688     }
2689     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2690     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2691     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2692     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2693   } else {
2694     C = *matredundant;
2695   }
2696 
2697   /* insert local matrix entries */
2698   rptr = sbuf_j;
2699   cols = sbuf_j + rend-rstart + 1;
2700   vals = sbuf_a;
2701   for (i=0; i<rend-rstart; i++) {
2702     row   = i + rstart;
2703     ncols = rptr[i+1] - rptr[i];
2704     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2705     vals += ncols;
2706     cols += ncols;
2707   }
2708   /* insert received matrix entries */
2709   for (imdex=0; imdex<nrecvs; imdex++) {
2710     rstart = rowrange[recv_rank[imdex]];
2711     rend   = rowrange[recv_rank[imdex]+1];
2712     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2713     rptr   = rbuf_j[imdex];
2714     cols   = rbuf_j[imdex] + rend-rstart + 1;
2715     vals   = rbuf_a[imdex];
2716     for (i=0; i<rend-rstart; i++) {
2717       row   = i + rstart;
2718       ncols = rptr[i+1] - rptr[i];
2719       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2720       vals += ncols;
2721       cols += ncols;
2722     }
2723   }
2724   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2725   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2726 
2727   if (reuse == MAT_INITIAL_MATRIX) {
2728     *matredundant = C;
2729 
2730     /* create a supporting struct and attach it to C for reuse */
2731     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2732     C->redundant      = redund;
2733     redund->nzlocal   = nzlocal;
2734     redund->nsends    = nsends;
2735     redund->nrecvs    = nrecvs;
2736     redund->send_rank = send_rank;
2737     redund->recv_rank = recv_rank;
2738     redund->sbuf_nz   = sbuf_nz;
2739     redund->rbuf_nz   = rbuf_nz;
2740     redund->sbuf_j    = sbuf_j;
2741     redund->sbuf_a    = sbuf_a;
2742     redund->rbuf_j    = rbuf_j;
2743     redund->rbuf_a    = rbuf_a;
2744     redund->psubcomm  = NULL;
2745   }
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2751 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2752 {
2753   PetscErrorCode ierr;
2754   MPI_Comm       comm;
2755   PetscMPIInt    size,subsize;
2756   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2757   Mat_Redundant  *redund=NULL;
2758   PetscSubcomm   psubcomm=NULL;
2759   MPI_Comm       subcomm_in=subcomm;
2760   Mat            *matseq;
2761   IS             isrow,iscol;
2762 
2763   PetscFunctionBegin;
2764   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2765     if (reuse ==  MAT_INITIAL_MATRIX) {
2766       /* create psubcomm, then get subcomm */
2767       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2768       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2769       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2770 
2771       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2772       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2773       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2774       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2775       subcomm = psubcomm->comm;
2776     } else { /* retrieve psubcomm and subcomm */
2777       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2778       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2779       redund   = (*matredundant)->redundant;
2780       psubcomm = redund->psubcomm;
2781     }
2782     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2783       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2784       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2785         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2786         (*matredundant)->redundant->psubcomm = psubcomm;
2787       }
2788       PetscFunctionReturn(0);
2789     }
2790   }
2791 
2792   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2793   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2794   if (reuse == MAT_INITIAL_MATRIX) {
2795     /* create a local sequential matrix matseq[0] */
2796     mloc_sub = PETSC_DECIDE;
2797     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2798     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2799     rstart = rend - mloc_sub;
2800     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2801     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2802   } else { /* reuse == MAT_REUSE_MATRIX */
2803     redund = (*matredundant)->redundant;
2804     isrow  = redund->isrow;
2805     iscol  = redund->iscol;
2806     matseq = redund->matseq;
2807   }
2808   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2809   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2810 
2811   if (reuse == MAT_INITIAL_MATRIX) {
2812     /* create a supporting struct and attach it to C for reuse */
2813     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2814     (*matredundant)->redundant = redund;
2815     redund->isrow              = isrow;
2816     redund->iscol              = iscol;
2817     redund->matseq             = matseq;
2818     redund->psubcomm           = psubcomm;
2819   }
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 #undef __FUNCT__
2824 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2825 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2826 {
2827   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2828   PetscErrorCode ierr;
2829   PetscInt       i,*idxb = 0;
2830   PetscScalar    *va,*vb;
2831   Vec            vtmp;
2832 
2833   PetscFunctionBegin;
2834   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2835   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2836   if (idx) {
2837     for (i=0; i<A->rmap->n; i++) {
2838       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2839     }
2840   }
2841 
2842   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2843   if (idx) {
2844     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2845   }
2846   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2847   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2848 
2849   for (i=0; i<A->rmap->n; i++) {
2850     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2851       va[i] = vb[i];
2852       if (idx) idx[i] = a->garray[idxb[i]];
2853     }
2854   }
2855 
2856   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2857   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2858   ierr = PetscFree(idxb);CHKERRQ(ierr);
2859   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2865 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2866 {
2867   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2868   PetscErrorCode ierr;
2869   PetscInt       i,*idxb = 0;
2870   PetscScalar    *va,*vb;
2871   Vec            vtmp;
2872 
2873   PetscFunctionBegin;
2874   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2875   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2876   if (idx) {
2877     for (i=0; i<A->cmap->n; i++) {
2878       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2879     }
2880   }
2881 
2882   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2883   if (idx) {
2884     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2885   }
2886   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2887   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2888 
2889   for (i=0; i<A->rmap->n; i++) {
2890     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2891       va[i] = vb[i];
2892       if (idx) idx[i] = a->garray[idxb[i]];
2893     }
2894   }
2895 
2896   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2897   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2898   ierr = PetscFree(idxb);CHKERRQ(ierr);
2899   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2900   PetscFunctionReturn(0);
2901 }
2902 
2903 #undef __FUNCT__
2904 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2905 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2906 {
2907   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2908   PetscInt       n      = A->rmap->n;
2909   PetscInt       cstart = A->cmap->rstart;
2910   PetscInt       *cmap  = mat->garray;
2911   PetscInt       *diagIdx, *offdiagIdx;
2912   Vec            diagV, offdiagV;
2913   PetscScalar    *a, *diagA, *offdiagA;
2914   PetscInt       r;
2915   PetscErrorCode ierr;
2916 
2917   PetscFunctionBegin;
2918   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2919   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2920   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2921   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2922   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2923   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2924   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2925   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2926   for (r = 0; r < n; ++r) {
2927     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2928       a[r]   = diagA[r];
2929       idx[r] = cstart + diagIdx[r];
2930     } else {
2931       a[r]   = offdiagA[r];
2932       idx[r] = cmap[offdiagIdx[r]];
2933     }
2934   }
2935   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2936   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2937   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2938   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2939   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2940   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 #undef __FUNCT__
2945 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2946 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2947 {
2948   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2949   PetscInt       n      = A->rmap->n;
2950   PetscInt       cstart = A->cmap->rstart;
2951   PetscInt       *cmap  = mat->garray;
2952   PetscInt       *diagIdx, *offdiagIdx;
2953   Vec            diagV, offdiagV;
2954   PetscScalar    *a, *diagA, *offdiagA;
2955   PetscInt       r;
2956   PetscErrorCode ierr;
2957 
2958   PetscFunctionBegin;
2959   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2960   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2961   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2962   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2963   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2964   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2965   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2966   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2967   for (r = 0; r < n; ++r) {
2968     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2969       a[r]   = diagA[r];
2970       idx[r] = cstart + diagIdx[r];
2971     } else {
2972       a[r]   = offdiagA[r];
2973       idx[r] = cmap[offdiagIdx[r]];
2974     }
2975   }
2976   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2977   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2978   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2979   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2980   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2981   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2982   PetscFunctionReturn(0);
2983 }
2984 
2985 #undef __FUNCT__
2986 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2987 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2988 {
2989   PetscErrorCode ierr;
2990   Mat            *dummy;
2991 
2992   PetscFunctionBegin;
2993   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2994   *newmat = *dummy;
2995   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3001 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3002 {
3003   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3004   PetscErrorCode ierr;
3005 
3006   PetscFunctionBegin;
3007   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3008   PetscFunctionReturn(0);
3009 }
3010 
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3013 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3014 {
3015   PetscErrorCode ierr;
3016   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3017 
3018   PetscFunctionBegin;
3019   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3020   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3021   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3022   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3023   PetscFunctionReturn(0);
3024 }
3025 
3026 /* -------------------------------------------------------------------*/
3027 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3028                                        MatGetRow_MPIAIJ,
3029                                        MatRestoreRow_MPIAIJ,
3030                                        MatMult_MPIAIJ,
3031                                 /* 4*/ MatMultAdd_MPIAIJ,
3032                                        MatMultTranspose_MPIAIJ,
3033                                        MatMultTransposeAdd_MPIAIJ,
3034 #if defined(PETSC_HAVE_PBGL)
3035                                        MatSolve_MPIAIJ,
3036 #else
3037                                        0,
3038 #endif
3039                                        0,
3040                                        0,
3041                                 /*10*/ 0,
3042                                        0,
3043                                        0,
3044                                        MatSOR_MPIAIJ,
3045                                        MatTranspose_MPIAIJ,
3046                                 /*15*/ MatGetInfo_MPIAIJ,
3047                                        MatEqual_MPIAIJ,
3048                                        MatGetDiagonal_MPIAIJ,
3049                                        MatDiagonalScale_MPIAIJ,
3050                                        MatNorm_MPIAIJ,
3051                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3052                                        MatAssemblyEnd_MPIAIJ,
3053                                        MatSetOption_MPIAIJ,
3054                                        MatZeroEntries_MPIAIJ,
3055                                 /*24*/ MatZeroRows_MPIAIJ,
3056                                        0,
3057 #if defined(PETSC_HAVE_PBGL)
3058                                        0,
3059 #else
3060                                        0,
3061 #endif
3062                                        0,
3063                                        0,
3064                                 /*29*/ MatSetUp_MPIAIJ,
3065 #if defined(PETSC_HAVE_PBGL)
3066                                        0,
3067 #else
3068                                        0,
3069 #endif
3070                                        0,
3071                                        0,
3072                                        0,
3073                                 /*34*/ MatDuplicate_MPIAIJ,
3074                                        0,
3075                                        0,
3076                                        0,
3077                                        0,
3078                                 /*39*/ MatAXPY_MPIAIJ,
3079                                        MatGetSubMatrices_MPIAIJ,
3080                                        MatIncreaseOverlap_MPIAIJ,
3081                                        MatGetValues_MPIAIJ,
3082                                        MatCopy_MPIAIJ,
3083                                 /*44*/ MatGetRowMax_MPIAIJ,
3084                                        MatScale_MPIAIJ,
3085                                        0,
3086                                        MatDiagonalSet_MPIAIJ,
3087                                        MatZeroRowsColumns_MPIAIJ,
3088                                 /*49*/ MatSetRandom_MPIAIJ,
3089                                        0,
3090                                        0,
3091                                        0,
3092                                        0,
3093                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3094                                        0,
3095                                        MatSetUnfactored_MPIAIJ,
3096                                        MatPermute_MPIAIJ,
3097                                        0,
3098                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3099                                        MatDestroy_MPIAIJ,
3100                                        MatView_MPIAIJ,
3101                                        0,
3102                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3103                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3104                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3105                                        0,
3106                                        0,
3107                                        0,
3108                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3109                                        MatGetRowMinAbs_MPIAIJ,
3110                                        0,
3111                                        MatSetColoring_MPIAIJ,
3112                                        0,
3113                                        MatSetValuesAdifor_MPIAIJ,
3114                                 /*75*/ MatFDColoringApply_AIJ,
3115                                        0,
3116                                        0,
3117                                        0,
3118                                        MatFindZeroDiagonals_MPIAIJ,
3119                                 /*80*/ 0,
3120                                        0,
3121                                        0,
3122                                 /*83*/ MatLoad_MPIAIJ,
3123                                        0,
3124                                        0,
3125                                        0,
3126                                        0,
3127                                        0,
3128                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3129                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3130                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3131                                        MatPtAP_MPIAIJ_MPIAIJ,
3132                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3133                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3134                                        0,
3135                                        0,
3136                                        0,
3137                                        0,
3138                                 /*99*/ 0,
3139                                        0,
3140                                        0,
3141                                        MatConjugate_MPIAIJ,
3142                                        0,
3143                                 /*104*/MatSetValuesRow_MPIAIJ,
3144                                        MatRealPart_MPIAIJ,
3145                                        MatImaginaryPart_MPIAIJ,
3146                                        0,
3147                                        0,
3148                                 /*109*/0,
3149                                        MatGetRedundantMatrix_MPIAIJ,
3150                                        MatGetRowMin_MPIAIJ,
3151                                        0,
3152                                        0,
3153                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3154                                        0,
3155                                        0,
3156                                        0,
3157                                        0,
3158                                 /*119*/0,
3159                                        0,
3160                                        0,
3161                                        0,
3162                                        MatGetMultiProcBlock_MPIAIJ,
3163                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3164                                        MatGetColumnNorms_MPIAIJ,
3165                                        MatInvertBlockDiagonal_MPIAIJ,
3166                                        0,
3167                                        MatGetSubMatricesParallel_MPIAIJ,
3168                                 /*129*/0,
3169                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3170                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3171                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3172                                        0,
3173                                 /*134*/0,
3174                                        0,
3175                                        0,
3176                                        0,
3177                                        0,
3178                                 /*139*/0,
3179                                        0,
3180                                        0,
3181                                        MatFDColoringSetUp_MPIXAIJ
3182 };
3183 
3184 /* ----------------------------------------------------------------------------------------*/
3185 
3186 #undef __FUNCT__
3187 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3188 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3189 {
3190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3191   PetscErrorCode ierr;
3192 
3193   PetscFunctionBegin;
3194   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3195   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3196   PetscFunctionReturn(0);
3197 }
3198 
3199 #undef __FUNCT__
3200 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3201 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3202 {
3203   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3204   PetscErrorCode ierr;
3205 
3206   PetscFunctionBegin;
3207   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3208   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3214 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3215 {
3216   Mat_MPIAIJ     *b;
3217   PetscErrorCode ierr;
3218 
3219   PetscFunctionBegin;
3220   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3221   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3222   b = (Mat_MPIAIJ*)B->data;
3223 
3224   if (!B->preallocated) {
3225     /* Explicitly create 2 MATSEQAIJ matrices. */
3226     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3227     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3228     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3229     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3230     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3231     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3232     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3233     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3234     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3235     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3236   }
3237 
3238   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3239   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3240   B->preallocated = PETSC_TRUE;
3241   PetscFunctionReturn(0);
3242 }
3243 
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3246 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3247 {
3248   Mat            mat;
3249   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3250   PetscErrorCode ierr;
3251 
3252   PetscFunctionBegin;
3253   *newmat = 0;
3254   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3255   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3256   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3257   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3258   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3259   a       = (Mat_MPIAIJ*)mat->data;
3260 
3261   mat->factortype   = matin->factortype;
3262   mat->assembled    = PETSC_TRUE;
3263   mat->insertmode   = NOT_SET_VALUES;
3264   mat->preallocated = PETSC_TRUE;
3265 
3266   a->size         = oldmat->size;
3267   a->rank         = oldmat->rank;
3268   a->donotstash   = oldmat->donotstash;
3269   a->roworiented  = oldmat->roworiented;
3270   a->rowindices   = 0;
3271   a->rowvalues    = 0;
3272   a->getrowactive = PETSC_FALSE;
3273 
3274   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3275   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3276 
3277   if (oldmat->colmap) {
3278 #if defined(PETSC_USE_CTABLE)
3279     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3280 #else
3281     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3282     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3283     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3284 #endif
3285   } else a->colmap = 0;
3286   if (oldmat->garray) {
3287     PetscInt len;
3288     len  = oldmat->B->cmap->n;
3289     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3290     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3291     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3292   } else a->garray = 0;
3293 
3294   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3295   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3296   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3297   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3298   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3299   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3300   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3301   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3302   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3303   *newmat = mat;
3304   PetscFunctionReturn(0);
3305 }
3306 
3307 
3308 
3309 #undef __FUNCT__
3310 #define __FUNCT__ "MatLoad_MPIAIJ"
3311 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3312 {
3313   PetscScalar    *vals,*svals;
3314   MPI_Comm       comm;
3315   PetscErrorCode ierr;
3316   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3317   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3318   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3319   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3320   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3321   int            fd;
3322   PetscInt       bs = 1;
3323 
3324   PetscFunctionBegin;
3325   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3326   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3327   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3328   if (!rank) {
3329     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3330     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3331     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3332   }
3333 
3334   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3335   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3336   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3337 
3338   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3339 
3340   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3341   M    = header[1]; N = header[2];
3342   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3343   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3344   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3345 
3346   /* If global sizes are set, check if they are consistent with that given in the file */
3347   if (sizesset) {
3348     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3349   }
3350   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3351   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3352 
3353   /* determine ownership of all (block) rows */
3354   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3355   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3356   else m = newMat->rmap->n; /* Set by user */
3357 
3358   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3359   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3360 
3361   /* First process needs enough room for process with most rows */
3362   if (!rank) {
3363     mmax = rowners[1];
3364     for (i=2; i<=size; i++) {
3365       mmax = PetscMax(mmax, rowners[i]);
3366     }
3367   } else mmax = -1;             /* unused, but compilers complain */
3368 
3369   rowners[0] = 0;
3370   for (i=2; i<=size; i++) {
3371     rowners[i] += rowners[i-1];
3372   }
3373   rstart = rowners[rank];
3374   rend   = rowners[rank+1];
3375 
3376   /* distribute row lengths to all processors */
3377   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3378   if (!rank) {
3379     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3380     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3381     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3382     for (j=0; j<m; j++) {
3383       procsnz[0] += ourlens[j];
3384     }
3385     for (i=1; i<size; i++) {
3386       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3387       /* calculate the number of nonzeros on each processor */
3388       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3389         procsnz[i] += rowlengths[j];
3390       }
3391       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3392     }
3393     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3394   } else {
3395     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3396   }
3397 
3398   if (!rank) {
3399     /* determine max buffer needed and allocate it */
3400     maxnz = 0;
3401     for (i=0; i<size; i++) {
3402       maxnz = PetscMax(maxnz,procsnz[i]);
3403     }
3404     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3405 
3406     /* read in my part of the matrix column indices  */
3407     nz   = procsnz[0];
3408     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3409     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3410 
3411     /* read in every one elses and ship off */
3412     for (i=1; i<size; i++) {
3413       nz   = procsnz[i];
3414       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3415       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3416     }
3417     ierr = PetscFree(cols);CHKERRQ(ierr);
3418   } else {
3419     /* determine buffer space needed for message */
3420     nz = 0;
3421     for (i=0; i<m; i++) {
3422       nz += ourlens[i];
3423     }
3424     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3425 
3426     /* receive message of column indices*/
3427     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3428   }
3429 
3430   /* determine column ownership if matrix is not square */
3431   if (N != M) {
3432     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3433     else n = newMat->cmap->n;
3434     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3435     cstart = cend - n;
3436   } else {
3437     cstart = rstart;
3438     cend   = rend;
3439     n      = cend - cstart;
3440   }
3441 
3442   /* loop over local rows, determining number of off diagonal entries */
3443   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3444   jj   = 0;
3445   for (i=0; i<m; i++) {
3446     for (j=0; j<ourlens[i]; j++) {
3447       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3448       jj++;
3449     }
3450   }
3451 
3452   for (i=0; i<m; i++) {
3453     ourlens[i] -= offlens[i];
3454   }
3455   if (!sizesset) {
3456     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3457   }
3458 
3459   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3460 
3461   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3462 
3463   for (i=0; i<m; i++) {
3464     ourlens[i] += offlens[i];
3465   }
3466 
3467   if (!rank) {
3468     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3469 
3470     /* read in my part of the matrix numerical values  */
3471     nz   = procsnz[0];
3472     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3473 
3474     /* insert into matrix */
3475     jj      = rstart;
3476     smycols = mycols;
3477     svals   = vals;
3478     for (i=0; i<m; i++) {
3479       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3480       smycols += ourlens[i];
3481       svals   += ourlens[i];
3482       jj++;
3483     }
3484 
3485     /* read in other processors and ship out */
3486     for (i=1; i<size; i++) {
3487       nz   = procsnz[i];
3488       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3489       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3490     }
3491     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3492   } else {
3493     /* receive numeric values */
3494     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3495 
3496     /* receive message of values*/
3497     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3498 
3499     /* insert into matrix */
3500     jj      = rstart;
3501     smycols = mycols;
3502     svals   = vals;
3503     for (i=0; i<m; i++) {
3504       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3505       smycols += ourlens[i];
3506       svals   += ourlens[i];
3507       jj++;
3508     }
3509   }
3510   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3511   ierr = PetscFree(vals);CHKERRQ(ierr);
3512   ierr = PetscFree(mycols);CHKERRQ(ierr);
3513   ierr = PetscFree(rowners);CHKERRQ(ierr);
3514   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3515   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3516   PetscFunctionReturn(0);
3517 }
3518 
3519 #undef __FUNCT__
3520 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3521 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3522 {
3523   PetscErrorCode ierr;
3524   IS             iscol_local;
3525   PetscInt       csize;
3526 
3527   PetscFunctionBegin;
3528   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3529   if (call == MAT_REUSE_MATRIX) {
3530     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3531     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3532   } else {
3533     PetscInt cbs;
3534     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3535     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3536     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3537   }
3538   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3539   if (call == MAT_INITIAL_MATRIX) {
3540     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3541     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3542   }
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3547 #undef __FUNCT__
3548 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3549 /*
3550     Not great since it makes two copies of the submatrix, first an SeqAIJ
3551   in local and then by concatenating the local matrices the end result.
3552   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3553 
3554   Note: This requires a sequential iscol with all indices.
3555 */
3556 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3557 {
3558   PetscErrorCode ierr;
3559   PetscMPIInt    rank,size;
3560   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3561   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3562   PetscBool      allcolumns, colflag;
3563   Mat            M,Mreuse;
3564   MatScalar      *vwork,*aa;
3565   MPI_Comm       comm;
3566   Mat_SeqAIJ     *aij;
3567 
3568   PetscFunctionBegin;
3569   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3570   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3571   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3572 
3573   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3574   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3575   if (colflag && ncol == mat->cmap->N) {
3576     allcolumns = PETSC_TRUE;
3577   } else {
3578     allcolumns = PETSC_FALSE;
3579   }
3580   if (call ==  MAT_REUSE_MATRIX) {
3581     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3582     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3583     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3584   } else {
3585     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3586   }
3587 
3588   /*
3589       m - number of local rows
3590       n - number of columns (same on all processors)
3591       rstart - first row in new global matrix generated
3592   */
3593   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3594   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3595   if (call == MAT_INITIAL_MATRIX) {
3596     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3597     ii  = aij->i;
3598     jj  = aij->j;
3599 
3600     /*
3601         Determine the number of non-zeros in the diagonal and off-diagonal
3602         portions of the matrix in order to do correct preallocation
3603     */
3604 
3605     /* first get start and end of "diagonal" columns */
3606     if (csize == PETSC_DECIDE) {
3607       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3608       if (mglobal == n) { /* square matrix */
3609         nlocal = m;
3610       } else {
3611         nlocal = n/size + ((n % size) > rank);
3612       }
3613     } else {
3614       nlocal = csize;
3615     }
3616     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3617     rstart = rend - nlocal;
3618     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3619 
3620     /* next, compute all the lengths */
3621     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3622     olens = dlens + m;
3623     for (i=0; i<m; i++) {
3624       jend = ii[i+1] - ii[i];
3625       olen = 0;
3626       dlen = 0;
3627       for (j=0; j<jend; j++) {
3628         if (*jj < rstart || *jj >= rend) olen++;
3629         else dlen++;
3630         jj++;
3631       }
3632       olens[i] = olen;
3633       dlens[i] = dlen;
3634     }
3635     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3636     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3637     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3638     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3639     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3640     ierr = PetscFree(dlens);CHKERRQ(ierr);
3641   } else {
3642     PetscInt ml,nl;
3643 
3644     M    = *newmat;
3645     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3646     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3647     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3648     /*
3649          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3650        rather than the slower MatSetValues().
3651     */
3652     M->was_assembled = PETSC_TRUE;
3653     M->assembled     = PETSC_FALSE;
3654   }
3655   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3656   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3657   ii   = aij->i;
3658   jj   = aij->j;
3659   aa   = aij->a;
3660   for (i=0; i<m; i++) {
3661     row   = rstart + i;
3662     nz    = ii[i+1] - ii[i];
3663     cwork = jj;     jj += nz;
3664     vwork = aa;     aa += nz;
3665     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3666   }
3667 
3668   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3669   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3670   *newmat = M;
3671 
3672   /* save submatrix used in processor for next request */
3673   if (call ==  MAT_INITIAL_MATRIX) {
3674     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3675     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3676   }
3677   PetscFunctionReturn(0);
3678 }
3679 
3680 #undef __FUNCT__
3681 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3682 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3683 {
3684   PetscInt       m,cstart, cend,j,nnz,i,d;
3685   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3686   const PetscInt *JJ;
3687   PetscScalar    *values;
3688   PetscErrorCode ierr;
3689 
3690   PetscFunctionBegin;
3691   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3692 
3693   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3694   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3695   m      = B->rmap->n;
3696   cstart = B->cmap->rstart;
3697   cend   = B->cmap->rend;
3698   rstart = B->rmap->rstart;
3699 
3700   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3701 
3702 #if defined(PETSC_USE_DEBUGGING)
3703   for (i=0; i<m; i++) {
3704     nnz = Ii[i+1]- Ii[i];
3705     JJ  = J + Ii[i];
3706     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3707     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3708     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3709   }
3710 #endif
3711 
3712   for (i=0; i<m; i++) {
3713     nnz     = Ii[i+1]- Ii[i];
3714     JJ      = J + Ii[i];
3715     nnz_max = PetscMax(nnz_max,nnz);
3716     d       = 0;
3717     for (j=0; j<nnz; j++) {
3718       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3719     }
3720     d_nnz[i] = d;
3721     o_nnz[i] = nnz - d;
3722   }
3723   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3724   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3725 
3726   if (v) values = (PetscScalar*)v;
3727   else {
3728     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3729   }
3730 
3731   for (i=0; i<m; i++) {
3732     ii   = i + rstart;
3733     nnz  = Ii[i+1]- Ii[i];
3734     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3735   }
3736   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3737   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3738 
3739   if (!v) {
3740     ierr = PetscFree(values);CHKERRQ(ierr);
3741   }
3742   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3743   PetscFunctionReturn(0);
3744 }
3745 
3746 #undef __FUNCT__
3747 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3748 /*@
3749    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3750    (the default parallel PETSc format).
3751 
3752    Collective on MPI_Comm
3753 
3754    Input Parameters:
3755 +  B - the matrix
3756 .  i - the indices into j for the start of each local row (starts with zero)
3757 .  j - the column indices for each local row (starts with zero)
3758 -  v - optional values in the matrix
3759 
3760    Level: developer
3761 
3762    Notes:
3763        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3764      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3765      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3766 
3767        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3768 
3769        The format which is used for the sparse matrix input, is equivalent to a
3770     row-major ordering.. i.e for the following matrix, the input data expected is
3771     as shown:
3772 
3773         1 0 0
3774         2 0 3     P0
3775        -------
3776         4 5 6     P1
3777 
3778      Process0 [P0]: rows_owned=[0,1]
3779         i =  {0,1,3}  [size = nrow+1  = 2+1]
3780         j =  {0,0,2}  [size = nz = 6]
3781         v =  {1,2,3}  [size = nz = 6]
3782 
3783      Process1 [P1]: rows_owned=[2]
3784         i =  {0,3}    [size = nrow+1  = 1+1]
3785         j =  {0,1,2}  [size = nz = 6]
3786         v =  {4,5,6}  [size = nz = 6]
3787 
3788 .keywords: matrix, aij, compressed row, sparse, parallel
3789 
3790 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3791           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3792 @*/
3793 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3794 {
3795   PetscErrorCode ierr;
3796 
3797   PetscFunctionBegin;
3798   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3799   PetscFunctionReturn(0);
3800 }
3801 
3802 #undef __FUNCT__
3803 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3804 /*@C
3805    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3806    (the default parallel PETSc format).  For good matrix assembly performance
3807    the user should preallocate the matrix storage by setting the parameters
3808    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3809    performance can be increased by more than a factor of 50.
3810 
3811    Collective on MPI_Comm
3812 
3813    Input Parameters:
3814 +  B - the matrix
3815 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3816            (same value is used for all local rows)
3817 .  d_nnz - array containing the number of nonzeros in the various rows of the
3818            DIAGONAL portion of the local submatrix (possibly different for each row)
3819            or NULL, if d_nz is used to specify the nonzero structure.
3820            The size of this array is equal to the number of local rows, i.e 'm'.
3821            For matrices that will be factored, you must leave room for (and set)
3822            the diagonal entry even if it is zero.
3823 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3824            submatrix (same value is used for all local rows).
3825 -  o_nnz - array containing the number of nonzeros in the various rows of the
3826            OFF-DIAGONAL portion of the local submatrix (possibly different for
3827            each row) or NULL, if o_nz is used to specify the nonzero
3828            structure. The size of this array is equal to the number
3829            of local rows, i.e 'm'.
3830 
3831    If the *_nnz parameter is given then the *_nz parameter is ignored
3832 
3833    The AIJ format (also called the Yale sparse matrix format or
3834    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3835    storage.  The stored row and column indices begin with zero.
3836    See Users-Manual: ch_mat for details.
3837 
3838    The parallel matrix is partitioned such that the first m0 rows belong to
3839    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3840    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3841 
3842    The DIAGONAL portion of the local submatrix of a processor can be defined
3843    as the submatrix which is obtained by extraction the part corresponding to
3844    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3845    first row that belongs to the processor, r2 is the last row belonging to
3846    the this processor, and c1-c2 is range of indices of the local part of a
3847    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3848    common case of a square matrix, the row and column ranges are the same and
3849    the DIAGONAL part is also square. The remaining portion of the local
3850    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3851 
3852    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3853 
3854    You can call MatGetInfo() to get information on how effective the preallocation was;
3855    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3856    You can also run with the option -info and look for messages with the string
3857    malloc in them to see if additional memory allocation was needed.
3858 
3859    Example usage:
3860 
3861    Consider the following 8x8 matrix with 34 non-zero values, that is
3862    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3863    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3864    as follows:
3865 
3866 .vb
3867             1  2  0  |  0  3  0  |  0  4
3868     Proc0   0  5  6  |  7  0  0  |  8  0
3869             9  0 10  | 11  0  0  | 12  0
3870     -------------------------------------
3871            13  0 14  | 15 16 17  |  0  0
3872     Proc1   0 18  0  | 19 20 21  |  0  0
3873             0  0  0  | 22 23  0  | 24  0
3874     -------------------------------------
3875     Proc2  25 26 27  |  0  0 28  | 29  0
3876            30  0  0  | 31 32 33  |  0 34
3877 .ve
3878 
3879    This can be represented as a collection of submatrices as:
3880 
3881 .vb
3882       A B C
3883       D E F
3884       G H I
3885 .ve
3886 
3887    Where the submatrices A,B,C are owned by proc0, D,E,F are
3888    owned by proc1, G,H,I are owned by proc2.
3889 
3890    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3891    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3892    The 'M','N' parameters are 8,8, and have the same values on all procs.
3893 
3894    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3895    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3896    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3897    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3898    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3899    matrix, ans [DF] as another SeqAIJ matrix.
3900 
3901    When d_nz, o_nz parameters are specified, d_nz storage elements are
3902    allocated for every row of the local diagonal submatrix, and o_nz
3903    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3904    One way to choose d_nz and o_nz is to use the max nonzerors per local
3905    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3906    In this case, the values of d_nz,o_nz are:
3907 .vb
3908      proc0 : dnz = 2, o_nz = 2
3909      proc1 : dnz = 3, o_nz = 2
3910      proc2 : dnz = 1, o_nz = 4
3911 .ve
3912    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3913    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3914    for proc3. i.e we are using 12+15+10=37 storage locations to store
3915    34 values.
3916 
3917    When d_nnz, o_nnz parameters are specified, the storage is specified
3918    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3919    In the above case the values for d_nnz,o_nnz are:
3920 .vb
3921      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3922      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3923      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3924 .ve
3925    Here the space allocated is sum of all the above values i.e 34, and
3926    hence pre-allocation is perfect.
3927 
3928    Level: intermediate
3929 
3930 .keywords: matrix, aij, compressed row, sparse, parallel
3931 
3932 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3933           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3934 @*/
3935 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3936 {
3937   PetscErrorCode ierr;
3938 
3939   PetscFunctionBegin;
3940   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3941   PetscValidType(B,1);
3942   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3943   PetscFunctionReturn(0);
3944 }
3945 
3946 #undef __FUNCT__
3947 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3948 /*@
3949      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3950          CSR format the local rows.
3951 
3952    Collective on MPI_Comm
3953 
3954    Input Parameters:
3955 +  comm - MPI communicator
3956 .  m - number of local rows (Cannot be PETSC_DECIDE)
3957 .  n - This value should be the same as the local size used in creating the
3958        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3959        calculated if N is given) For square matrices n is almost always m.
3960 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3961 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3962 .   i - row indices
3963 .   j - column indices
3964 -   a - matrix values
3965 
3966    Output Parameter:
3967 .   mat - the matrix
3968 
3969    Level: intermediate
3970 
3971    Notes:
3972        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3973      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3974      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3975 
3976        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3977 
3978        The format which is used for the sparse matrix input, is equivalent to a
3979     row-major ordering.. i.e for the following matrix, the input data expected is
3980     as shown:
3981 
3982         1 0 0
3983         2 0 3     P0
3984        -------
3985         4 5 6     P1
3986 
3987      Process0 [P0]: rows_owned=[0,1]
3988         i =  {0,1,3}  [size = nrow+1  = 2+1]
3989         j =  {0,0,2}  [size = nz = 6]
3990         v =  {1,2,3}  [size = nz = 6]
3991 
3992      Process1 [P1]: rows_owned=[2]
3993         i =  {0,3}    [size = nrow+1  = 1+1]
3994         j =  {0,1,2}  [size = nz = 6]
3995         v =  {4,5,6}  [size = nz = 6]
3996 
3997 .keywords: matrix, aij, compressed row, sparse, parallel
3998 
3999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4000           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4001 @*/
4002 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4003 {
4004   PetscErrorCode ierr;
4005 
4006   PetscFunctionBegin;
4007   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4008   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4009   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4010   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4011   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4012   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4013   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4014   PetscFunctionReturn(0);
4015 }
4016 
4017 #undef __FUNCT__
4018 #define __FUNCT__ "MatCreateAIJ"
4019 /*@C
4020    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4021    (the default parallel PETSc format).  For good matrix assembly performance
4022    the user should preallocate the matrix storage by setting the parameters
4023    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4024    performance can be increased by more than a factor of 50.
4025 
4026    Collective on MPI_Comm
4027 
4028    Input Parameters:
4029 +  comm - MPI communicator
4030 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4031            This value should be the same as the local size used in creating the
4032            y vector for the matrix-vector product y = Ax.
4033 .  n - This value should be the same as the local size used in creating the
4034        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4035        calculated if N is given) For square matrices n is almost always m.
4036 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4037 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4038 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4039            (same value is used for all local rows)
4040 .  d_nnz - array containing the number of nonzeros in the various rows of the
4041            DIAGONAL portion of the local submatrix (possibly different for each row)
4042            or NULL, if d_nz is used to specify the nonzero structure.
4043            The size of this array is equal to the number of local rows, i.e 'm'.
4044 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4045            submatrix (same value is used for all local rows).
4046 -  o_nnz - array containing the number of nonzeros in the various rows of the
4047            OFF-DIAGONAL portion of the local submatrix (possibly different for
4048            each row) or NULL, if o_nz is used to specify the nonzero
4049            structure. The size of this array is equal to the number
4050            of local rows, i.e 'm'.
4051 
4052    Output Parameter:
4053 .  A - the matrix
4054 
4055    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4056    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4057    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4058 
4059    Notes:
4060    If the *_nnz parameter is given then the *_nz parameter is ignored
4061 
4062    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4063    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4064    storage requirements for this matrix.
4065 
4066    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4067    processor than it must be used on all processors that share the object for
4068    that argument.
4069 
4070    The user MUST specify either the local or global matrix dimensions
4071    (possibly both).
4072 
4073    The parallel matrix is partitioned across processors such that the
4074    first m0 rows belong to process 0, the next m1 rows belong to
4075    process 1, the next m2 rows belong to process 2 etc.. where
4076    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4077    values corresponding to [m x N] submatrix.
4078 
4079    The columns are logically partitioned with the n0 columns belonging
4080    to 0th partition, the next n1 columns belonging to the next
4081    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4082 
4083    The DIAGONAL portion of the local submatrix on any given processor
4084    is the submatrix corresponding to the rows and columns m,n
4085    corresponding to the given processor. i.e diagonal matrix on
4086    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4087    etc. The remaining portion of the local submatrix [m x (N-n)]
4088    constitute the OFF-DIAGONAL portion. The example below better
4089    illustrates this concept.
4090 
4091    For a square global matrix we define each processor's diagonal portion
4092    to be its local rows and the corresponding columns (a square submatrix);
4093    each processor's off-diagonal portion encompasses the remainder of the
4094    local matrix (a rectangular submatrix).
4095 
4096    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4097 
4098    When calling this routine with a single process communicator, a matrix of
4099    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4100    type of communicator, use the construction mechanism:
4101      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4102 
4103    By default, this format uses inodes (identical nodes) when possible.
4104    We search for consecutive rows with the same nonzero structure, thereby
4105    reusing matrix information to achieve increased efficiency.
4106 
4107    Options Database Keys:
4108 +  -mat_no_inode  - Do not use inodes
4109 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4110 -  -mat_aij_oneindex - Internally use indexing starting at 1
4111         rather than 0.  Note that when calling MatSetValues(),
4112         the user still MUST index entries starting at 0!
4113 
4114 
4115    Example usage:
4116 
4117    Consider the following 8x8 matrix with 34 non-zero values, that is
4118    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4119    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4120    as follows:
4121 
4122 .vb
4123             1  2  0  |  0  3  0  |  0  4
4124     Proc0   0  5  6  |  7  0  0  |  8  0
4125             9  0 10  | 11  0  0  | 12  0
4126     -------------------------------------
4127            13  0 14  | 15 16 17  |  0  0
4128     Proc1   0 18  0  | 19 20 21  |  0  0
4129             0  0  0  | 22 23  0  | 24  0
4130     -------------------------------------
4131     Proc2  25 26 27  |  0  0 28  | 29  0
4132            30  0  0  | 31 32 33  |  0 34
4133 .ve
4134 
4135    This can be represented as a collection of submatrices as:
4136 
4137 .vb
4138       A B C
4139       D E F
4140       G H I
4141 .ve
4142 
4143    Where the submatrices A,B,C are owned by proc0, D,E,F are
4144    owned by proc1, G,H,I are owned by proc2.
4145 
4146    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4147    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4148    The 'M','N' parameters are 8,8, and have the same values on all procs.
4149 
4150    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4151    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4152    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4153    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4154    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4155    matrix, ans [DF] as another SeqAIJ matrix.
4156 
4157    When d_nz, o_nz parameters are specified, d_nz storage elements are
4158    allocated for every row of the local diagonal submatrix, and o_nz
4159    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4160    One way to choose d_nz and o_nz is to use the max nonzerors per local
4161    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4162    In this case, the values of d_nz,o_nz are:
4163 .vb
4164      proc0 : dnz = 2, o_nz = 2
4165      proc1 : dnz = 3, o_nz = 2
4166      proc2 : dnz = 1, o_nz = 4
4167 .ve
4168    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4169    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4170    for proc3. i.e we are using 12+15+10=37 storage locations to store
4171    34 values.
4172 
4173    When d_nnz, o_nnz parameters are specified, the storage is specified
4174    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4175    In the above case the values for d_nnz,o_nnz are:
4176 .vb
4177      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4178      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4179      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4180 .ve
4181    Here the space allocated is sum of all the above values i.e 34, and
4182    hence pre-allocation is perfect.
4183 
4184    Level: intermediate
4185 
4186 .keywords: matrix, aij, compressed row, sparse, parallel
4187 
4188 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4189           MPIAIJ, MatCreateMPIAIJWithArrays()
4190 @*/
4191 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4192 {
4193   PetscErrorCode ierr;
4194   PetscMPIInt    size;
4195 
4196   PetscFunctionBegin;
4197   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4198   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4199   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4200   if (size > 1) {
4201     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4202     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4203   } else {
4204     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4205     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4206   }
4207   PetscFunctionReturn(0);
4208 }
4209 
4210 #undef __FUNCT__
4211 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4212 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4213 {
4214   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4215 
4216   PetscFunctionBegin;
4217   if (Ad)     *Ad     = a->A;
4218   if (Ao)     *Ao     = a->B;
4219   if (colmap) *colmap = a->garray;
4220   PetscFunctionReturn(0);
4221 }
4222 
4223 #undef __FUNCT__
4224 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4225 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4226 {
4227   PetscErrorCode ierr;
4228   PetscInt       i;
4229   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4230 
4231   PetscFunctionBegin;
4232   if (coloring->ctype == IS_COLORING_GLOBAL) {
4233     ISColoringValue *allcolors,*colors;
4234     ISColoring      ocoloring;
4235 
4236     /* set coloring for diagonal portion */
4237     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4238 
4239     /* set coloring for off-diagonal portion */
4240     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4241     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4242     for (i=0; i<a->B->cmap->n; i++) {
4243       colors[i] = allcolors[a->garray[i]];
4244     }
4245     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4246     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4247     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4248     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4249   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4250     ISColoringValue *colors;
4251     PetscInt        *larray;
4252     ISColoring      ocoloring;
4253 
4254     /* set coloring for diagonal portion */
4255     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4256     for (i=0; i<a->A->cmap->n; i++) {
4257       larray[i] = i + A->cmap->rstart;
4258     }
4259     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4260     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4261     for (i=0; i<a->A->cmap->n; i++) {
4262       colors[i] = coloring->colors[larray[i]];
4263     }
4264     ierr = PetscFree(larray);CHKERRQ(ierr);
4265     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4266     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4267     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4268 
4269     /* set coloring for off-diagonal portion */
4270     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4271     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4272     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4273     for (i=0; i<a->B->cmap->n; i++) {
4274       colors[i] = coloring->colors[larray[i]];
4275     }
4276     ierr = PetscFree(larray);CHKERRQ(ierr);
4277     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4278     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4279     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4280   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4281   PetscFunctionReturn(0);
4282 }
4283 
4284 #undef __FUNCT__
4285 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4286 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4287 {
4288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4289   PetscErrorCode ierr;
4290 
4291   PetscFunctionBegin;
4292   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4293   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4294   PetscFunctionReturn(0);
4295 }
4296 
4297 #undef __FUNCT__
4298 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4299 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4300 {
4301   PetscErrorCode ierr;
4302   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4303   PetscInt       *indx;
4304 
4305   PetscFunctionBegin;
4306   /* This routine will ONLY return MPIAIJ type matrix */
4307   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4308   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4309   if (n == PETSC_DECIDE) {
4310     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4311   }
4312   /* Check sum(n) = N */
4313   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4314   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4315 
4316   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4317   rstart -= m;
4318 
4319   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4320   for (i=0; i<m; i++) {
4321     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4322     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4323     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4324   }
4325 
4326   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4327   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4328   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4329   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4330   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4331   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4332   PetscFunctionReturn(0);
4333 }
4334 
4335 #undef __FUNCT__
4336 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4337 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4338 {
4339   PetscErrorCode ierr;
4340   PetscInt       m,N,i,rstart,nnz,Ii;
4341   PetscInt       *indx;
4342   PetscScalar    *values;
4343 
4344   PetscFunctionBegin;
4345   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4346   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4347   for (i=0; i<m; i++) {
4348     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4349     Ii   = i + rstart;
4350     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4351     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4352   }
4353   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4354   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 #undef __FUNCT__
4359 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4360 /*@
4361       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4362                  matrices from each processor
4363 
4364     Collective on MPI_Comm
4365 
4366    Input Parameters:
4367 +    comm - the communicators the parallel matrix will live on
4368 .    inmat - the input sequential matrices
4369 .    n - number of local columns (or PETSC_DECIDE)
4370 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4371 
4372    Output Parameter:
4373 .    outmat - the parallel matrix generated
4374 
4375     Level: advanced
4376 
4377    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4378 
4379 @*/
4380 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4381 {
4382   PetscErrorCode ierr;
4383   PetscMPIInt    size;
4384 
4385   PetscFunctionBegin;
4386   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4387   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4388   if (size == 1) {
4389     if (scall == MAT_INITIAL_MATRIX) {
4390       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4391     } else {
4392       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4393     }
4394   } else {
4395     if (scall == MAT_INITIAL_MATRIX) {
4396       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4397     }
4398     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4399   }
4400   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4401   PetscFunctionReturn(0);
4402 }
4403 
4404 #undef __FUNCT__
4405 #define __FUNCT__ "MatFileSplit"
4406 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4407 {
4408   PetscErrorCode    ierr;
4409   PetscMPIInt       rank;
4410   PetscInt          m,N,i,rstart,nnz;
4411   size_t            len;
4412   const PetscInt    *indx;
4413   PetscViewer       out;
4414   char              *name;
4415   Mat               B;
4416   const PetscScalar *values;
4417 
4418   PetscFunctionBegin;
4419   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4420   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4421   /* Should this be the type of the diagonal block of A? */
4422   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4423   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4424   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4425   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4426   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4427   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4428   for (i=0; i<m; i++) {
4429     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4430     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4431     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4432   }
4433   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4434   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4435 
4436   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4437   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4438   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4439   sprintf(name,"%s.%d",outfile,rank);
4440   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4441   ierr = PetscFree(name);CHKERRQ(ierr);
4442   ierr = MatView(B,out);CHKERRQ(ierr);
4443   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4444   ierr = MatDestroy(&B);CHKERRQ(ierr);
4445   PetscFunctionReturn(0);
4446 }
4447 
4448 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4449 #undef __FUNCT__
4450 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4451 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4452 {
4453   PetscErrorCode      ierr;
4454   Mat_Merge_SeqsToMPI *merge;
4455   PetscContainer      container;
4456 
4457   PetscFunctionBegin;
4458   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4459   if (container) {
4460     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4461     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4462     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4463     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4464     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4466     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4468     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4469     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4470     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4473     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4474     ierr = PetscFree(merge);CHKERRQ(ierr);
4475     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4476   }
4477   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4478   PetscFunctionReturn(0);
4479 }
4480 
4481 #include <../src/mat/utils/freespace.h>
4482 #include <petscbt.h>
4483 
4484 #undef __FUNCT__
4485 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4486 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4487 {
4488   PetscErrorCode      ierr;
4489   MPI_Comm            comm;
4490   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4491   PetscMPIInt         size,rank,taga,*len_s;
4492   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4493   PetscInt            proc,m;
4494   PetscInt            **buf_ri,**buf_rj;
4495   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4496   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4497   MPI_Request         *s_waits,*r_waits;
4498   MPI_Status          *status;
4499   MatScalar           *aa=a->a;
4500   MatScalar           **abuf_r,*ba_i;
4501   Mat_Merge_SeqsToMPI *merge;
4502   PetscContainer      container;
4503 
4504   PetscFunctionBegin;
4505   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4506   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4507 
4508   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4509   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4510 
4511   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4512   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4513 
4514   bi     = merge->bi;
4515   bj     = merge->bj;
4516   buf_ri = merge->buf_ri;
4517   buf_rj = merge->buf_rj;
4518 
4519   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4520   owners = merge->rowmap->range;
4521   len_s  = merge->len_s;
4522 
4523   /* send and recv matrix values */
4524   /*-----------------------------*/
4525   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4526   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4527 
4528   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4529   for (proc=0,k=0; proc<size; proc++) {
4530     if (!len_s[proc]) continue;
4531     i    = owners[proc];
4532     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4533     k++;
4534   }
4535 
4536   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4537   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4538   ierr = PetscFree(status);CHKERRQ(ierr);
4539 
4540   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4541   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4542 
4543   /* insert mat values of mpimat */
4544   /*----------------------------*/
4545   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4546   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4547 
4548   for (k=0; k<merge->nrecv; k++) {
4549     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4550     nrows       = *(buf_ri_k[k]);
4551     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4552     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4553   }
4554 
4555   /* set values of ba */
4556   m = merge->rowmap->n;
4557   for (i=0; i<m; i++) {
4558     arow = owners[rank] + i;
4559     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4560     bnzi = bi[i+1] - bi[i];
4561     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4562 
4563     /* add local non-zero vals of this proc's seqmat into ba */
4564     anzi   = ai[arow+1] - ai[arow];
4565     aj     = a->j + ai[arow];
4566     aa     = a->a + ai[arow];
4567     nextaj = 0;
4568     for (j=0; nextaj<anzi; j++) {
4569       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4570         ba_i[j] += aa[nextaj++];
4571       }
4572     }
4573 
4574     /* add received vals into ba */
4575     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4576       /* i-th row */
4577       if (i == *nextrow[k]) {
4578         anzi   = *(nextai[k]+1) - *nextai[k];
4579         aj     = buf_rj[k] + *(nextai[k]);
4580         aa     = abuf_r[k] + *(nextai[k]);
4581         nextaj = 0;
4582         for (j=0; nextaj<anzi; j++) {
4583           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4584             ba_i[j] += aa[nextaj++];
4585           }
4586         }
4587         nextrow[k]++; nextai[k]++;
4588       }
4589     }
4590     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4591   }
4592   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4593   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4594 
4595   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4596   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4597   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4598   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4599   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4600   PetscFunctionReturn(0);
4601 }
4602 
4603 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4604 
4605 #undef __FUNCT__
4606 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4607 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4608 {
4609   PetscErrorCode      ierr;
4610   Mat                 B_mpi;
4611   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4612   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4613   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4614   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4615   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4616   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4617   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4618   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4619   MPI_Status          *status;
4620   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4621   PetscBT             lnkbt;
4622   Mat_Merge_SeqsToMPI *merge;
4623   PetscContainer      container;
4624 
4625   PetscFunctionBegin;
4626   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4627 
4628   /* make sure it is a PETSc comm */
4629   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4630   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4631   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4632 
4633   ierr = PetscNew(&merge);CHKERRQ(ierr);
4634   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4635 
4636   /* determine row ownership */
4637   /*---------------------------------------------------------*/
4638   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4639   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4640   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4641   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4642   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4643   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4644   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4645 
4646   m      = merge->rowmap->n;
4647   owners = merge->rowmap->range;
4648 
4649   /* determine the number of messages to send, their lengths */
4650   /*---------------------------------------------------------*/
4651   len_s = merge->len_s;
4652 
4653   len          = 0; /* length of buf_si[] */
4654   merge->nsend = 0;
4655   for (proc=0; proc<size; proc++) {
4656     len_si[proc] = 0;
4657     if (proc == rank) {
4658       len_s[proc] = 0;
4659     } else {
4660       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4661       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4662     }
4663     if (len_s[proc]) {
4664       merge->nsend++;
4665       nrows = 0;
4666       for (i=owners[proc]; i<owners[proc+1]; i++) {
4667         if (ai[i+1] > ai[i]) nrows++;
4668       }
4669       len_si[proc] = 2*(nrows+1);
4670       len         += len_si[proc];
4671     }
4672   }
4673 
4674   /* determine the number and length of messages to receive for ij-structure */
4675   /*-------------------------------------------------------------------------*/
4676   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4677   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4678 
4679   /* post the Irecv of j-structure */
4680   /*-------------------------------*/
4681   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4682   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4683 
4684   /* post the Isend of j-structure */
4685   /*--------------------------------*/
4686   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4687 
4688   for (proc=0, k=0; proc<size; proc++) {
4689     if (!len_s[proc]) continue;
4690     i    = owners[proc];
4691     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4692     k++;
4693   }
4694 
4695   /* receives and sends of j-structure are complete */
4696   /*------------------------------------------------*/
4697   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4698   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4699 
4700   /* send and recv i-structure */
4701   /*---------------------------*/
4702   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4703   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4704 
4705   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4706   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4707   for (proc=0,k=0; proc<size; proc++) {
4708     if (!len_s[proc]) continue;
4709     /* form outgoing message for i-structure:
4710          buf_si[0]:                 nrows to be sent
4711                [1:nrows]:           row index (global)
4712                [nrows+1:2*nrows+1]: i-structure index
4713     */
4714     /*-------------------------------------------*/
4715     nrows       = len_si[proc]/2 - 1;
4716     buf_si_i    = buf_si + nrows+1;
4717     buf_si[0]   = nrows;
4718     buf_si_i[0] = 0;
4719     nrows       = 0;
4720     for (i=owners[proc]; i<owners[proc+1]; i++) {
4721       anzi = ai[i+1] - ai[i];
4722       if (anzi) {
4723         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4724         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4725         nrows++;
4726       }
4727     }
4728     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4729     k++;
4730     buf_si += len_si[proc];
4731   }
4732 
4733   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4734   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4735 
4736   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4737   for (i=0; i<merge->nrecv; i++) {
4738     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4739   }
4740 
4741   ierr = PetscFree(len_si);CHKERRQ(ierr);
4742   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4743   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4744   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4745   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4746   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4747   ierr = PetscFree(status);CHKERRQ(ierr);
4748 
4749   /* compute a local seq matrix in each processor */
4750   /*----------------------------------------------*/
4751   /* allocate bi array and free space for accumulating nonzero column info */
4752   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4753   bi[0] = 0;
4754 
4755   /* create and initialize a linked list */
4756   nlnk = N+1;
4757   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4758 
4759   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4760   len  = ai[owners[rank+1]] - ai[owners[rank]];
4761   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4762 
4763   current_space = free_space;
4764 
4765   /* determine symbolic info for each local row */
4766   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4767 
4768   for (k=0; k<merge->nrecv; k++) {
4769     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4770     nrows       = *buf_ri_k[k];
4771     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4772     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4773   }
4774 
4775   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4776   len  = 0;
4777   for (i=0; i<m; i++) {
4778     bnzi = 0;
4779     /* add local non-zero cols of this proc's seqmat into lnk */
4780     arow  = owners[rank] + i;
4781     anzi  = ai[arow+1] - ai[arow];
4782     aj    = a->j + ai[arow];
4783     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4784     bnzi += nlnk;
4785     /* add received col data into lnk */
4786     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4787       if (i == *nextrow[k]) { /* i-th row */
4788         anzi  = *(nextai[k]+1) - *nextai[k];
4789         aj    = buf_rj[k] + *nextai[k];
4790         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4791         bnzi += nlnk;
4792         nextrow[k]++; nextai[k]++;
4793       }
4794     }
4795     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4796 
4797     /* if free space is not available, make more free space */
4798     if (current_space->local_remaining<bnzi) {
4799       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4800       nspacedouble++;
4801     }
4802     /* copy data into free space, then initialize lnk */
4803     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4804     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4805 
4806     current_space->array           += bnzi;
4807     current_space->local_used      += bnzi;
4808     current_space->local_remaining -= bnzi;
4809 
4810     bi[i+1] = bi[i] + bnzi;
4811   }
4812 
4813   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4814 
4815   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4816   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4817   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4818 
4819   /* create symbolic parallel matrix B_mpi */
4820   /*---------------------------------------*/
4821   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4822   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4823   if (n==PETSC_DECIDE) {
4824     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4825   } else {
4826     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4827   }
4828   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4829   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4830   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4831   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4832   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4833 
4834   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4835   B_mpi->assembled    = PETSC_FALSE;
4836   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4837   merge->bi           = bi;
4838   merge->bj           = bj;
4839   merge->buf_ri       = buf_ri;
4840   merge->buf_rj       = buf_rj;
4841   merge->coi          = NULL;
4842   merge->coj          = NULL;
4843   merge->owners_co    = NULL;
4844 
4845   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4846 
4847   /* attach the supporting struct to B_mpi for reuse */
4848   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4849   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4850   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4851   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4852   *mpimat = B_mpi;
4853 
4854   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4855   PetscFunctionReturn(0);
4856 }
4857 
4858 #undef __FUNCT__
4859 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4860 /*@C
4861       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4862                  matrices from each processor
4863 
4864     Collective on MPI_Comm
4865 
4866    Input Parameters:
4867 +    comm - the communicators the parallel matrix will live on
4868 .    seqmat - the input sequential matrices
4869 .    m - number of local rows (or PETSC_DECIDE)
4870 .    n - number of local columns (or PETSC_DECIDE)
4871 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4872 
4873    Output Parameter:
4874 .    mpimat - the parallel matrix generated
4875 
4876     Level: advanced
4877 
4878    Notes:
4879      The dimensions of the sequential matrix in each processor MUST be the same.
4880      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4881      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4882 @*/
4883 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4884 {
4885   PetscErrorCode ierr;
4886   PetscMPIInt    size;
4887 
4888   PetscFunctionBegin;
4889   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4890   if (size == 1) {
4891     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4892     if (scall == MAT_INITIAL_MATRIX) {
4893       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4894     } else {
4895       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4896     }
4897     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4898     PetscFunctionReturn(0);
4899   }
4900   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4901   if (scall == MAT_INITIAL_MATRIX) {
4902     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4903   }
4904   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4905   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4906   PetscFunctionReturn(0);
4907 }
4908 
4909 #undef __FUNCT__
4910 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4911 /*@
4912      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4913           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4914           with MatGetSize()
4915 
4916     Not Collective
4917 
4918    Input Parameters:
4919 +    A - the matrix
4920 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4921 
4922    Output Parameter:
4923 .    A_loc - the local sequential matrix generated
4924 
4925     Level: developer
4926 
4927 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4928 
4929 @*/
4930 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4931 {
4932   PetscErrorCode ierr;
4933   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4934   Mat_SeqAIJ     *mat,*a,*b;
4935   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4936   MatScalar      *aa,*ba,*cam;
4937   PetscScalar    *ca;
4938   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4939   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4940   PetscBool      match;
4941   MPI_Comm       comm;
4942   PetscMPIInt    size;
4943 
4944   PetscFunctionBegin;
4945   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4946   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4947   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4949   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4950 
4951   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4952   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4953   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4954   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4955   aa = a->a; ba = b->a;
4956   if (scall == MAT_INITIAL_MATRIX) {
4957     if (size == 1) {
4958       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4959       PetscFunctionReturn(0);
4960     }
4961 
4962     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4963     ci[0] = 0;
4964     for (i=0; i<am; i++) {
4965       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4966     }
4967     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4968     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4969     k    = 0;
4970     for (i=0; i<am; i++) {
4971       ncols_o = bi[i+1] - bi[i];
4972       ncols_d = ai[i+1] - ai[i];
4973       /* off-diagonal portion of A */
4974       for (jo=0; jo<ncols_o; jo++) {
4975         col = cmap[*bj];
4976         if (col >= cstart) break;
4977         cj[k]   = col; bj++;
4978         ca[k++] = *ba++;
4979       }
4980       /* diagonal portion of A */
4981       for (j=0; j<ncols_d; j++) {
4982         cj[k]   = cstart + *aj++;
4983         ca[k++] = *aa++;
4984       }
4985       /* off-diagonal portion of A */
4986       for (j=jo; j<ncols_o; j++) {
4987         cj[k]   = cmap[*bj++];
4988         ca[k++] = *ba++;
4989       }
4990     }
4991     /* put together the new matrix */
4992     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4993     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4994     /* Since these are PETSc arrays, change flags to free them as necessary. */
4995     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4996     mat->free_a  = PETSC_TRUE;
4997     mat->free_ij = PETSC_TRUE;
4998     mat->nonew   = 0;
4999   } else if (scall == MAT_REUSE_MATRIX) {
5000     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5001     ci = mat->i; cj = mat->j; cam = mat->a;
5002     for (i=0; i<am; i++) {
5003       /* off-diagonal portion of A */
5004       ncols_o = bi[i+1] - bi[i];
5005       for (jo=0; jo<ncols_o; jo++) {
5006         col = cmap[*bj];
5007         if (col >= cstart) break;
5008         *cam++ = *ba++; bj++;
5009       }
5010       /* diagonal portion of A */
5011       ncols_d = ai[i+1] - ai[i];
5012       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5013       /* off-diagonal portion of A */
5014       for (j=jo; j<ncols_o; j++) {
5015         *cam++ = *ba++; bj++;
5016       }
5017     }
5018   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5019   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 #undef __FUNCT__
5024 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5025 /*@C
5026      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5027 
5028     Not Collective
5029 
5030    Input Parameters:
5031 +    A - the matrix
5032 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5033 -    row, col - index sets of rows and columns to extract (or NULL)
5034 
5035    Output Parameter:
5036 .    A_loc - the local sequential matrix generated
5037 
5038     Level: developer
5039 
5040 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5041 
5042 @*/
5043 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5044 {
5045   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5046   PetscErrorCode ierr;
5047   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5048   IS             isrowa,iscola;
5049   Mat            *aloc;
5050   PetscBool      match;
5051 
5052   PetscFunctionBegin;
5053   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5054   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5055   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5056   if (!row) {
5057     start = A->rmap->rstart; end = A->rmap->rend;
5058     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5059   } else {
5060     isrowa = *row;
5061   }
5062   if (!col) {
5063     start = A->cmap->rstart;
5064     cmap  = a->garray;
5065     nzA   = a->A->cmap->n;
5066     nzB   = a->B->cmap->n;
5067     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5068     ncols = 0;
5069     for (i=0; i<nzB; i++) {
5070       if (cmap[i] < start) idx[ncols++] = cmap[i];
5071       else break;
5072     }
5073     imark = i;
5074     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5075     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5076     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5077   } else {
5078     iscola = *col;
5079   }
5080   if (scall != MAT_INITIAL_MATRIX) {
5081     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5082     aloc[0] = *A_loc;
5083   }
5084   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5085   *A_loc = aloc[0];
5086   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5087   if (!row) {
5088     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5089   }
5090   if (!col) {
5091     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5092   }
5093   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5094   PetscFunctionReturn(0);
5095 }
5096 
5097 #undef __FUNCT__
5098 #define __FUNCT__ "MatGetBrowsOfAcols"
5099 /*@C
5100     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5101 
5102     Collective on Mat
5103 
5104    Input Parameters:
5105 +    A,B - the matrices in mpiaij format
5106 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5107 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5108 
5109    Output Parameter:
5110 +    rowb, colb - index sets of rows and columns of B to extract
5111 -    B_seq - the sequential matrix generated
5112 
5113     Level: developer
5114 
5115 @*/
5116 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5117 {
5118   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5119   PetscErrorCode ierr;
5120   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5121   IS             isrowb,iscolb;
5122   Mat            *bseq=NULL;
5123 
5124   PetscFunctionBegin;
5125   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5126     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5127   }
5128   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5129 
5130   if (scall == MAT_INITIAL_MATRIX) {
5131     start = A->cmap->rstart;
5132     cmap  = a->garray;
5133     nzA   = a->A->cmap->n;
5134     nzB   = a->B->cmap->n;
5135     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5136     ncols = 0;
5137     for (i=0; i<nzB; i++) {  /* row < local row index */
5138       if (cmap[i] < start) idx[ncols++] = cmap[i];
5139       else break;
5140     }
5141     imark = i;
5142     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5143     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5144     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5145     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5146   } else {
5147     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5148     isrowb  = *rowb; iscolb = *colb;
5149     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5150     bseq[0] = *B_seq;
5151   }
5152   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5153   *B_seq = bseq[0];
5154   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5155   if (!rowb) {
5156     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5157   } else {
5158     *rowb = isrowb;
5159   }
5160   if (!colb) {
5161     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5162   } else {
5163     *colb = iscolb;
5164   }
5165   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5166   PetscFunctionReturn(0);
5167 }
5168 
5169 #undef __FUNCT__
5170 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5171 /*
5172     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5173     of the OFF-DIAGONAL portion of local A
5174 
5175     Collective on Mat
5176 
5177    Input Parameters:
5178 +    A,B - the matrices in mpiaij format
5179 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5180 
5181    Output Parameter:
5182 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5183 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5184 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5185 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5186 
5187     Level: developer
5188 
5189 */
5190 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5191 {
5192   VecScatter_MPI_General *gen_to,*gen_from;
5193   PetscErrorCode         ierr;
5194   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5195   Mat_SeqAIJ             *b_oth;
5196   VecScatter             ctx =a->Mvctx;
5197   MPI_Comm               comm;
5198   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5199   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5200   PetscScalar            *rvalues,*svalues;
5201   MatScalar              *b_otha,*bufa,*bufA;
5202   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5203   MPI_Request            *rwaits = NULL,*swaits = NULL;
5204   MPI_Status             *sstatus,rstatus;
5205   PetscMPIInt            jj,size;
5206   PetscInt               *cols,sbs,rbs;
5207   PetscScalar            *vals;
5208 
5209   PetscFunctionBegin;
5210   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5211   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5212   if (size == 1) PetscFunctionReturn(0);
5213 
5214   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5215     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5216   }
5217   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5218   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5219 
5220   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5221   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5222   rvalues  = gen_from->values; /* holds the length of receiving row */
5223   svalues  = gen_to->values;   /* holds the length of sending row */
5224   nrecvs   = gen_from->n;
5225   nsends   = gen_to->n;
5226 
5227   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5228   srow    = gen_to->indices;    /* local row index to be sent */
5229   sstarts = gen_to->starts;
5230   sprocs  = gen_to->procs;
5231   sstatus = gen_to->sstatus;
5232   sbs     = gen_to->bs;
5233   rstarts = gen_from->starts;
5234   rprocs  = gen_from->procs;
5235   rbs     = gen_from->bs;
5236 
5237   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5238   if (scall == MAT_INITIAL_MATRIX) {
5239     /* i-array */
5240     /*---------*/
5241     /*  post receives */
5242     for (i=0; i<nrecvs; i++) {
5243       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5244       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5245       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5246     }
5247 
5248     /* pack the outgoing message */
5249     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5250 
5251     sstartsj[0] = 0;
5252     rstartsj[0] = 0;
5253     len         = 0; /* total length of j or a array to be sent */
5254     k           = 0;
5255     for (i=0; i<nsends; i++) {
5256       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5257       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5258       for (j=0; j<nrows; j++) {
5259         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5260         for (l=0; l<sbs; l++) {
5261           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5262 
5263           rowlen[j*sbs+l] = ncols;
5264 
5265           len += ncols;
5266           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5267         }
5268         k++;
5269       }
5270       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5271 
5272       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5273     }
5274     /* recvs and sends of i-array are completed */
5275     i = nrecvs;
5276     while (i--) {
5277       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5278     }
5279     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5280 
5281     /* allocate buffers for sending j and a arrays */
5282     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5283     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5284 
5285     /* create i-array of B_oth */
5286     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5287 
5288     b_othi[0] = 0;
5289     len       = 0; /* total length of j or a array to be received */
5290     k         = 0;
5291     for (i=0; i<nrecvs; i++) {
5292       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5293       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5294       for (j=0; j<nrows; j++) {
5295         b_othi[k+1] = b_othi[k] + rowlen[j];
5296         len        += rowlen[j]; k++;
5297       }
5298       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5299     }
5300 
5301     /* allocate space for j and a arrrays of B_oth */
5302     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5303     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5304 
5305     /* j-array */
5306     /*---------*/
5307     /*  post receives of j-array */
5308     for (i=0; i<nrecvs; i++) {
5309       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5310       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5311     }
5312 
5313     /* pack the outgoing message j-array */
5314     k = 0;
5315     for (i=0; i<nsends; i++) {
5316       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5317       bufJ  = bufj+sstartsj[i];
5318       for (j=0; j<nrows; j++) {
5319         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5320         for (ll=0; ll<sbs; ll++) {
5321           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5322           for (l=0; l<ncols; l++) {
5323             *bufJ++ = cols[l];
5324           }
5325           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5326         }
5327       }
5328       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5329     }
5330 
5331     /* recvs and sends of j-array are completed */
5332     i = nrecvs;
5333     while (i--) {
5334       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5335     }
5336     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5337   } else if (scall == MAT_REUSE_MATRIX) {
5338     sstartsj = *startsj_s;
5339     rstartsj = *startsj_r;
5340     bufa     = *bufa_ptr;
5341     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5342     b_otha   = b_oth->a;
5343   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5344 
5345   /* a-array */
5346   /*---------*/
5347   /*  post receives of a-array */
5348   for (i=0; i<nrecvs; i++) {
5349     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5350     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5351   }
5352 
5353   /* pack the outgoing message a-array */
5354   k = 0;
5355   for (i=0; i<nsends; i++) {
5356     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5357     bufA  = bufa+sstartsj[i];
5358     for (j=0; j<nrows; j++) {
5359       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5360       for (ll=0; ll<sbs; ll++) {
5361         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5362         for (l=0; l<ncols; l++) {
5363           *bufA++ = vals[l];
5364         }
5365         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5366       }
5367     }
5368     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5369   }
5370   /* recvs and sends of a-array are completed */
5371   i = nrecvs;
5372   while (i--) {
5373     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5374   }
5375   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5376   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5377 
5378   if (scall == MAT_INITIAL_MATRIX) {
5379     /* put together the new matrix */
5380     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5381 
5382     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5383     /* Since these are PETSc arrays, change flags to free them as necessary. */
5384     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5385     b_oth->free_a  = PETSC_TRUE;
5386     b_oth->free_ij = PETSC_TRUE;
5387     b_oth->nonew   = 0;
5388 
5389     ierr = PetscFree(bufj);CHKERRQ(ierr);
5390     if (!startsj_s || !bufa_ptr) {
5391       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5392       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5393     } else {
5394       *startsj_s = sstartsj;
5395       *startsj_r = rstartsj;
5396       *bufa_ptr  = bufa;
5397     }
5398   }
5399   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5400   PetscFunctionReturn(0);
5401 }
5402 
5403 #undef __FUNCT__
5404 #define __FUNCT__ "MatGetCommunicationStructs"
5405 /*@C
5406   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5407 
5408   Not Collective
5409 
5410   Input Parameters:
5411 . A - The matrix in mpiaij format
5412 
5413   Output Parameter:
5414 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5415 . colmap - A map from global column index to local index into lvec
5416 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5417 
5418   Level: developer
5419 
5420 @*/
5421 #if defined(PETSC_USE_CTABLE)
5422 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5423 #else
5424 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5425 #endif
5426 {
5427   Mat_MPIAIJ *a;
5428 
5429   PetscFunctionBegin;
5430   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5431   PetscValidPointer(lvec, 2);
5432   PetscValidPointer(colmap, 3);
5433   PetscValidPointer(multScatter, 4);
5434   a = (Mat_MPIAIJ*) A->data;
5435   if (lvec) *lvec = a->lvec;
5436   if (colmap) *colmap = a->colmap;
5437   if (multScatter) *multScatter = a->Mvctx;
5438   PetscFunctionReturn(0);
5439 }
5440 
5441 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5442 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5443 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5444 #if defined(PETSC_HAVE_ELEMENTAL)
5445 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5446 #endif
5447 
5448 #undef __FUNCT__
5449 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5450 /*
5451     Computes (B'*A')' since computing B*A directly is untenable
5452 
5453                n                       p                          p
5454         (              )       (              )         (                  )
5455       m (      A       )  *  n (       B      )   =   m (         C        )
5456         (              )       (              )         (                  )
5457 
5458 */
5459 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5460 {
5461   PetscErrorCode ierr;
5462   Mat            At,Bt,Ct;
5463 
5464   PetscFunctionBegin;
5465   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5466   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5467   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5468   ierr = MatDestroy(&At);CHKERRQ(ierr);
5469   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5470   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5471   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5472   PetscFunctionReturn(0);
5473 }
5474 
5475 #undef __FUNCT__
5476 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5477 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5478 {
5479   PetscErrorCode ierr;
5480   PetscInt       m=A->rmap->n,n=B->cmap->n;
5481   Mat            Cmat;
5482 
5483   PetscFunctionBegin;
5484   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5485   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5486   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5487   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5488   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5489   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5490   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5491   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5492 
5493   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5494 
5495   *C = Cmat;
5496   PetscFunctionReturn(0);
5497 }
5498 
5499 /* ----------------------------------------------------------------*/
5500 #undef __FUNCT__
5501 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5502 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5503 {
5504   PetscErrorCode ierr;
5505 
5506   PetscFunctionBegin;
5507   if (scall == MAT_INITIAL_MATRIX) {
5508     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5509     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5510     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5511   }
5512   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5513   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5514   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5515   PetscFunctionReturn(0);
5516 }
5517 
5518 #if defined(PETSC_HAVE_MUMPS)
5519 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5520 #endif
5521 #if defined(PETSC_HAVE_PASTIX)
5522 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5523 #endif
5524 #if defined(PETSC_HAVE_SUPERLU_DIST)
5525 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5526 #endif
5527 #if defined(PETSC_HAVE_CLIQUE)
5528 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5529 #endif
5530 
5531 /*MC
5532    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5533 
5534    Options Database Keys:
5535 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5536 
5537   Level: beginner
5538 
5539 .seealso: MatCreateAIJ()
5540 M*/
5541 
5542 #undef __FUNCT__
5543 #define __FUNCT__ "MatCreate_MPIAIJ"
5544 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5545 {
5546   Mat_MPIAIJ     *b;
5547   PetscErrorCode ierr;
5548   PetscMPIInt    size;
5549 
5550   PetscFunctionBegin;
5551   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5552 
5553   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5554   B->data       = (void*)b;
5555   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5556   B->assembled  = PETSC_FALSE;
5557   B->insertmode = NOT_SET_VALUES;
5558   b->size       = size;
5559 
5560   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5561 
5562   /* build cache for off array entries formed */
5563   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5564 
5565   b->donotstash  = PETSC_FALSE;
5566   b->colmap      = 0;
5567   b->garray      = 0;
5568   b->roworiented = PETSC_TRUE;
5569 
5570   /* stuff used for matrix vector multiply */
5571   b->lvec  = NULL;
5572   b->Mvctx = NULL;
5573 
5574   /* stuff for MatGetRow() */
5575   b->rowindices   = 0;
5576   b->rowvalues    = 0;
5577   b->getrowactive = PETSC_FALSE;
5578 
5579   /* flexible pointer used in CUSP/CUSPARSE classes */
5580   b->spptr = NULL;
5581 
5582 #if defined(PETSC_HAVE_MUMPS)
5583   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5584 #endif
5585 #if defined(PETSC_HAVE_PASTIX)
5586   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5587 #endif
5588 #if defined(PETSC_HAVE_SUPERLU_DIST)
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5590 #endif
5591 #if defined(PETSC_HAVE_CLIQUE)
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5593 #endif
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5604 #if defined(PETSC_HAVE_ELEMENTAL)
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5606 #endif
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5611   PetscFunctionReturn(0);
5612 }
5613 
5614 #undef __FUNCT__
5615 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5616 /*@
5617      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5618          and "off-diagonal" part of the matrix in CSR format.
5619 
5620    Collective on MPI_Comm
5621 
5622    Input Parameters:
5623 +  comm - MPI communicator
5624 .  m - number of local rows (Cannot be PETSC_DECIDE)
5625 .  n - This value should be the same as the local size used in creating the
5626        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5627        calculated if N is given) For square matrices n is almost always m.
5628 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5629 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5630 .   i - row indices for "diagonal" portion of matrix
5631 .   j - column indices
5632 .   a - matrix values
5633 .   oi - row indices for "off-diagonal" portion of matrix
5634 .   oj - column indices
5635 -   oa - matrix values
5636 
5637    Output Parameter:
5638 .   mat - the matrix
5639 
5640    Level: advanced
5641 
5642    Notes:
5643        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5644        must free the arrays once the matrix has been destroyed and not before.
5645 
5646        The i and j indices are 0 based
5647 
5648        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5649 
5650        This sets local rows and cannot be used to set off-processor values.
5651 
5652        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5653        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5654        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5655        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5656        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5657        communication if it is known that only local entries will be set.
5658 
5659 .keywords: matrix, aij, compressed row, sparse, parallel
5660 
5661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5662           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5663 @*/
5664 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5665 {
5666   PetscErrorCode ierr;
5667   Mat_MPIAIJ     *maij;
5668 
5669   PetscFunctionBegin;
5670   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5671   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5672   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5673   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5674   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5675   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5676   maij = (Mat_MPIAIJ*) (*mat)->data;
5677 
5678   (*mat)->preallocated = PETSC_TRUE;
5679 
5680   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5681   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5682 
5683   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5684   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5685 
5686   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5688   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5689   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690 
5691   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5693   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5694   PetscFunctionReturn(0);
5695 }
5696 
5697 /*
5698     Special version for direct calls from Fortran
5699 */
5700 #include <petsc-private/fortranimpl.h>
5701 
5702 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5706 #endif
5707 
5708 /* Change these macros so can be used in void function */
5709 #undef CHKERRQ
5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5711 #undef SETERRQ2
5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5713 #undef SETERRQ3
5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5715 #undef SETERRQ
5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5717 
5718 #undef __FUNCT__
5719 #define __FUNCT__ "matsetvaluesmpiaij_"
5720 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5721 {
5722   Mat            mat  = *mmat;
5723   PetscInt       m    = *mm, n = *mn;
5724   InsertMode     addv = *maddv;
5725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5726   PetscScalar    value;
5727   PetscErrorCode ierr;
5728 
5729   MatCheckPreallocated(mat,1);
5730   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5731 
5732 #if defined(PETSC_USE_DEBUG)
5733   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5734 #endif
5735   {
5736     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5737     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5738     PetscBool roworiented = aij->roworiented;
5739 
5740     /* Some Variables required in the macro */
5741     Mat        A                 = aij->A;
5742     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5743     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5744     MatScalar  *aa               = a->a;
5745     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5746     Mat        B                 = aij->B;
5747     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5748     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5749     MatScalar  *ba               = b->a;
5750 
5751     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5752     PetscInt  nonew = a->nonew;
5753     MatScalar *ap1,*ap2;
5754 
5755     PetscFunctionBegin;
5756     for (i=0; i<m; i++) {
5757       if (im[i] < 0) continue;
5758 #if defined(PETSC_USE_DEBUG)
5759       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5760 #endif
5761       if (im[i] >= rstart && im[i] < rend) {
5762         row      = im[i] - rstart;
5763         lastcol1 = -1;
5764         rp1      = aj + ai[row];
5765         ap1      = aa + ai[row];
5766         rmax1    = aimax[row];
5767         nrow1    = ailen[row];
5768         low1     = 0;
5769         high1    = nrow1;
5770         lastcol2 = -1;
5771         rp2      = bj + bi[row];
5772         ap2      = ba + bi[row];
5773         rmax2    = bimax[row];
5774         nrow2    = bilen[row];
5775         low2     = 0;
5776         high2    = nrow2;
5777 
5778         for (j=0; j<n; j++) {
5779           if (roworiented) value = v[i*n+j];
5780           else value = v[i+j*m];
5781           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5782           if (in[j] >= cstart && in[j] < cend) {
5783             col = in[j] - cstart;
5784             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5785           } else if (in[j] < 0) continue;
5786 #if defined(PETSC_USE_DEBUG)
5787           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5788 #endif
5789           else {
5790             if (mat->was_assembled) {
5791               if (!aij->colmap) {
5792                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5793               }
5794 #if defined(PETSC_USE_CTABLE)
5795               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5796               col--;
5797 #else
5798               col = aij->colmap[in[j]] - 1;
5799 #endif
5800               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5801                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5802                 col  =  in[j];
5803                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5804                 B     = aij->B;
5805                 b     = (Mat_SeqAIJ*)B->data;
5806                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5807                 rp2   = bj + bi[row];
5808                 ap2   = ba + bi[row];
5809                 rmax2 = bimax[row];
5810                 nrow2 = bilen[row];
5811                 low2  = 0;
5812                 high2 = nrow2;
5813                 bm    = aij->B->rmap->n;
5814                 ba    = b->a;
5815               }
5816             } else col = in[j];
5817             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5818           }
5819         }
5820       } else if (!aij->donotstash) {
5821         if (roworiented) {
5822           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5823         } else {
5824           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5825         }
5826       }
5827     }
5828   }
5829   PetscFunctionReturnVoid();
5830 }
5831 
5832