xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ab784542e6f84c4a78686d3d530acfe9a8f3f81a)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
730   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
731 
732   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
733   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
734     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
735     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
742 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
743 {
744   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
749   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatZeroRows_MPIAIJ"
755 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
756 {
757   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
758   PetscInt      *owners = A->rmap->range;
759   PetscInt       n      = A->rmap->n;
760   PetscSF        sf;
761   PetscInt      *lrows;
762   PetscSFNode   *rrows;
763   PetscInt       r, p = 0, len = 0;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   /* Create SF where leaves are input rows and roots are owned rows */
768   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
769   for (r = 0; r < n; ++r) lrows[r] = -1;
770   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
771   for (r = 0; r < N; ++r) {
772     const PetscInt idx   = rows[r];
773     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
774     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
775       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
776     }
777     if (A->nooffproczerorows) {
778       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
779       lrows[len++] = idx - owners[p];
780     } else {
781       rrows[r].rank = p;
782       rrows[r].index = rows[r] - owners[p];
783     }
784   }
785   if (!A->nooffproczerorows) {
786     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
787     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
788     /* Collect flags for rows to be zeroed */
789     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
790     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
791     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
792     /* Compress and put in row numbers */
793     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
794   }
795   /* fix right hand side if needed */
796   if (x && b) {
797     const PetscScalar *xx;
798     PetscScalar       *bb;
799 
800     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
801     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
802     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
803     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
804     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
805   }
806   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
807   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
809     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
810   } else if (diag != 0.0) {
811     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
813     for (r = 0; r < len; ++r) {
814       const PetscInt row = lrows[r] + A->rmap->rstart;
815       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
816     }
817     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
818     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819   } else {
820     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
821   }
822   ierr = PetscFree(lrows);CHKERRQ(ierr);
823 
824   /* only change matrix nonzero state if pattern was allowed to be changed */
825   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
826     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
827     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
834 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
835 {
836   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
837   PetscErrorCode    ierr;
838   PetscMPIInt       n = A->rmap->n;
839   PetscInt          i,j,r,m,p = 0,len = 0;
840   PetscInt          *lrows,*owners = A->rmap->range;
841   PetscSFNode       *rrows;
842   PetscSF           sf;
843   const PetscScalar *xx;
844   PetscScalar       *bb,*mask;
845   Vec               xmask,lmask;
846   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
847   const PetscInt    *aj, *ii,*ridx;
848   PetscScalar       *aa;
849 
850   PetscFunctionBegin;
851   /* Create SF where leaves are input rows and roots are owned rows */
852   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
853   for (r = 0; r < n; ++r) lrows[r] = -1;
854   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
855   for (r = 0; r < N; ++r) {
856     const PetscInt idx   = rows[r];
857     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
858     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
859       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
860     }
861     rrows[r].rank  = p;
862     rrows[r].index = rows[r] - owners[p];
863   }
864   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
865   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
866   /* Collect flags for rows to be zeroed */
867   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
868   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
869   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
870   /* Compress and put in row numbers */
871   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
872   /* zero diagonal part of matrix */
873   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
874   /* handle off diagonal part of matrix */
875   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
876   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
877   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
878   for (i=0; i<len; i++) bb[lrows[i]] = 1;
879   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
880   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
883   if (x) {
884     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
887     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
888   }
889   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
890   /* remove zeroed rows of off diagonal matrix */
891   ii = aij->i;
892   for (i=0; i<len; i++) {
893     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
894   }
895   /* loop over all elements of off process part of matrix zeroing removed columns*/
896   if (aij->compressedrow.use) {
897     m    = aij->compressedrow.nrows;
898     ii   = aij->compressedrow.i;
899     ridx = aij->compressedrow.rindex;
900     for (i=0; i<m; i++) {
901       n  = ii[i+1] - ii[i];
902       aj = aij->j + ii[i];
903       aa = aij->a + ii[i];
904 
905       for (j=0; j<n; j++) {
906         if (PetscAbsScalar(mask[*aj])) {
907           if (b) bb[*ridx] -= *aa*xx[*aj];
908           *aa = 0.0;
909         }
910         aa++;
911         aj++;
912       }
913       ridx++;
914     }
915   } else { /* do not use compressed row format */
916     m = l->B->rmap->n;
917     for (i=0; i<m; i++) {
918       n  = ii[i+1] - ii[i];
919       aj = aij->j + ii[i];
920       aa = aij->a + ii[i];
921       for (j=0; j<n; j++) {
922         if (PetscAbsScalar(mask[*aj])) {
923           if (b) bb[i] -= *aa*xx[*aj];
924           *aa = 0.0;
925         }
926         aa++;
927         aj++;
928       }
929     }
930   }
931   if (x) {
932     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
933     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
934   }
935   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
936   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
937   ierr = PetscFree(lrows);CHKERRQ(ierr);
938 
939   /* only change matrix nonzero state if pattern was allowed to be changed */
940   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
941     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
942     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMult_MPIAIJ"
949 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953   PetscInt       nt;
954 
955   PetscFunctionBegin;
956   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
957   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
958   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
960   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatMultAdd_MPIAIJ"
979 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
980 {
981   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
987   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
994 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998   PetscBool      merged;
999 
1000   PetscFunctionBegin;
1001   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1002   /* do nondiagonal part */
1003   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1004   if (!merged) {
1005     /* send it on its way */
1006     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1007     /* do local part */
1008     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1009     /* receive remote parts: note this assumes the values are not actually */
1010     /* added in yy until the next line, */
1011     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1012   } else {
1013     /* do local part */
1014     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1015     /* send it on its way */
1016     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017     /* values actually were received in the Begin() but we need to call this nop */
1018     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1025 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1026 {
1027   MPI_Comm       comm;
1028   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1029   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1030   IS             Me,Notme;
1031   PetscErrorCode ierr;
1032   PetscInt       M,N,first,last,*notme,i;
1033   PetscMPIInt    size;
1034 
1035   PetscFunctionBegin;
1036   /* Easy test: symmetric diagonal block */
1037   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1038   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1039   if (!*f) PetscFunctionReturn(0);
1040   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1041   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1042   if (size == 1) PetscFunctionReturn(0);
1043 
1044   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1045   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1046   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1047   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1048   for (i=0; i<first; i++) notme[i] = i;
1049   for (i=last; i<M; i++) notme[i-last+first] = i;
1050   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1051   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1052   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1053   Aoff = Aoffs[0];
1054   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1055   Boff = Boffs[0];
1056   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1057   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1059   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1061   ierr = PetscFree(notme);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1075   /* send it on its way */
1076   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077   /* do local part */
1078   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1079   /* receive remote parts */
1080   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 /*
1085   This only works correctly for square matrices where the subblock A->A is the
1086    diagonal block
1087 */
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1090 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1091 {
1092   PetscErrorCode ierr;
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094 
1095   PetscFunctionBegin;
1096   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1097   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1098   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatScale_MPIAIJ"
1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1105 {
1106   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1111   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatDestroy_MPIAIJ"
1117 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1118 {
1119   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123 #if defined(PETSC_USE_LOG)
1124   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1125 #endif
1126   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1127   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1128   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1130 #if defined(PETSC_USE_CTABLE)
1131   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1132 #else
1133   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1134 #endif
1135   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1136   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1137   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1138   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1139   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1140   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1141 
1142   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1151 #if defined(PETSC_HAVE_ELEMENTAL)
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1153 #endif
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1159 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1160 {
1161   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1162   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1163   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1164   PetscErrorCode ierr;
1165   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1166   int            fd;
1167   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1168   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1169   PetscScalar    *column_values;
1170   PetscInt       message_count,flowcontrolcount;
1171   FILE           *file;
1172 
1173   PetscFunctionBegin;
1174   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1175   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1176   nz   = A->nz + B->nz;
1177   if (!rank) {
1178     header[0] = MAT_FILE_CLASSID;
1179     header[1] = mat->rmap->N;
1180     header[2] = mat->cmap->N;
1181 
1182     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1183     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1184     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1185     /* get largest number of rows any processor has */
1186     rlen  = mat->rmap->n;
1187     range = mat->rmap->range;
1188     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1189   } else {
1190     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1191     rlen = mat->rmap->n;
1192   }
1193 
1194   /* load up the local row counts */
1195   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1196   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1197 
1198   /* store the row lengths to the file */
1199   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1200   if (!rank) {
1201     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1202     for (i=1; i<size; i++) {
1203       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1204       rlen = range[i+1] - range[i];
1205       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1206       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1207     }
1208     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1209   } else {
1210     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1211     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1213   }
1214   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1215 
1216   /* load up the local column indices */
1217   nzmax = nz; /* th processor needs space a largest processor needs */
1218   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1220   cnt   = 0;
1221   for (i=0; i<mat->rmap->n; i++) {
1222     for (j=B->i[i]; j<B->i[i+1]; j++) {
1223       if ((col = garray[B->j[j]]) > cstart) break;
1224       column_indices[cnt++] = col;
1225     }
1226     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1227     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1228   }
1229   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1230 
1231   /* store the column indices to the file */
1232   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1233   if (!rank) {
1234     MPI_Status status;
1235     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1236     for (i=1; i<size; i++) {
1237       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1238       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1239       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1240       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1251 
1252   /* load up the local column values */
1253   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1254   cnt  = 0;
1255   for (i=0; i<mat->rmap->n; i++) {
1256     for (j=B->i[i]; j<B->i[i+1]; j++) {
1257       if (garray[B->j[j]] > cstart) break;
1258       column_values[cnt++] = B->a[j];
1259     }
1260     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1261     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1262   }
1263   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1264 
1265   /* store the column values to the file */
1266   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1267   if (!rank) {
1268     MPI_Status status;
1269     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1270     for (i=1; i<size; i++) {
1271       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1272       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1273       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1274       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1278   } else {
1279     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1280     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1283   }
1284   ierr = PetscFree(column_values);CHKERRQ(ierr);
1285 
1286   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1287   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #include <petscdraw.h>
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1294 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1295 {
1296   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1297   PetscErrorCode    ierr;
1298   PetscMPIInt       rank = aij->rank,size = aij->size;
1299   PetscBool         isdraw,iascii,isbinary;
1300   PetscViewer       sviewer;
1301   PetscViewerFormat format;
1302 
1303   PetscFunctionBegin;
1304   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1305   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1306   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1307   if (iascii) {
1308     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1309     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1310       MatInfo   info;
1311       PetscBool inodes;
1312 
1313       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1314       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1315       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1317       if (!inodes) {
1318         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1319                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1320       } else {
1321         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1322                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1323       }
1324       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1326       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1328       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1331       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1332       PetscFunctionReturn(0);
1333     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1334       PetscInt inodecount,inodelimit,*inodes;
1335       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1336       if (inodes) {
1337         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1338       } else {
1339         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1340       }
1341       PetscFunctionReturn(0);
1342     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1343       PetscFunctionReturn(0);
1344     }
1345   } else if (isbinary) {
1346     if (size == 1) {
1347       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1348       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1349     } else {
1350       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1351     }
1352     PetscFunctionReturn(0);
1353   } else if (isdraw) {
1354     PetscDraw draw;
1355     PetscBool isnull;
1356     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1357     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1358   }
1359 
1360   {
1361     /* assemble the entire matrix onto first processor. */
1362     Mat        A;
1363     Mat_SeqAIJ *Aloc;
1364     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1365     MatScalar  *a;
1366 
1367     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1368     if (!rank) {
1369       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1370     } else {
1371       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1372     }
1373     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1374     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1375     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1376     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1377     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1378 
1379     /* copy over the A part */
1380     Aloc = (Mat_SeqAIJ*)aij->A->data;
1381     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1382     row  = mat->rmap->rstart;
1383     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1384     for (i=0; i<m; i++) {
1385       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1386       row++;
1387       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1388     }
1389     aj = Aloc->j;
1390     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1391 
1392     /* copy over the B part */
1393     Aloc = (Mat_SeqAIJ*)aij->B->data;
1394     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1395     row  = mat->rmap->rstart;
1396     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1397     ct   = cols;
1398     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1399     for (i=0; i<m; i++) {
1400       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1401       row++;
1402       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1403     }
1404     ierr = PetscFree(ct);CHKERRQ(ierr);
1405     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1407     /*
1408        Everyone has to call to draw the matrix since the graphics waits are
1409        synchronized across all processors that share the PetscDraw object
1410     */
1411     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1412     if (!rank) {
1413       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1414     }
1415     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1416     ierr = MatDestroy(&A);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatView_MPIAIJ"
1423 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1424 {
1425   PetscErrorCode ierr;
1426   PetscBool      iascii,isdraw,issocket,isbinary;
1427 
1428   PetscFunctionBegin;
1429   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1433   if (iascii || isdraw || isbinary || issocket) {
1434     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatSOR_MPIAIJ"
1441 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1442 {
1443   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1444   PetscErrorCode ierr;
1445   Vec            bb1 = 0;
1446   PetscBool      hasop;
1447 
1448   PetscFunctionBegin;
1449   if (flag == SOR_APPLY_UPPER) {
1450     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1451     PetscFunctionReturn(0);
1452   }
1453 
1454   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1455     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1456   }
1457 
1458   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1461       its--;
1462     }
1463 
1464     while (its--) {
1465       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1470       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1471 
1472       /* local sweep */
1473       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1474     }
1475   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1476     if (flag & SOR_ZERO_INITIAL_GUESS) {
1477       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1478       its--;
1479     }
1480     while (its--) {
1481       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483 
1484       /* update rhs: bb1 = bb - B*x */
1485       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1486       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1487 
1488       /* local sweep */
1489       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1490     }
1491   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1492     if (flag & SOR_ZERO_INITIAL_GUESS) {
1493       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1494       its--;
1495     }
1496     while (its--) {
1497       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499 
1500       /* update rhs: bb1 = bb - B*x */
1501       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1502       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1503 
1504       /* local sweep */
1505       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1506     }
1507   } else if (flag & SOR_EISENSTAT) {
1508     Vec xx1;
1509 
1510     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1511     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1512 
1513     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515     if (!mat->diag) {
1516       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1517       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1518     }
1519     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1520     if (hasop) {
1521       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1522     } else {
1523       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1524     }
1525     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1526 
1527     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1528 
1529     /* local sweep */
1530     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1531     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1532     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1533   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1534 
1535   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatPermute_MPIAIJ"
1541 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1542 {
1543   Mat            aA,aB,Aperm;
1544   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1545   PetscScalar    *aa,*ba;
1546   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1547   PetscSF        rowsf,sf;
1548   IS             parcolp = NULL;
1549   PetscBool      done;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1554   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1555   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1556   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1557 
1558   /* Invert row permutation to find out where my rows should go */
1559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1560   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1561   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1562   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1563   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1564   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1565 
1566   /* Invert column permutation to find out where my columns should go */
1567   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1568   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1569   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1570   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1571   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1574 
1575   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1576   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1577   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1578 
1579   /* Find out where my gcols should go */
1580   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1581   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1583   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1584   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1588 
1589   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1590   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1591   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt row = rdest[i],rowner;
1594     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1595     for (j=ai[i]; j<ai[i+1]; j++) {
1596       PetscInt cowner,col = cdest[aj[j]];
1597       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1598       if (rowner == cowner) dnnz[i]++;
1599       else onnz[i]++;
1600     }
1601     for (j=bi[i]; j<bi[i+1]; j++) {
1602       PetscInt cowner,col = gcdest[bj[j]];
1603       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1604       if (rowner == cowner) dnnz[i]++;
1605       else onnz[i]++;
1606     }
1607   }
1608   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1609   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1610   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1611   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1612   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1613 
1614   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1615   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1616   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1617   for (i=0; i<m; i++) {
1618     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1619     PetscInt j0,rowlen;
1620     rowlen = ai[i+1] - ai[i];
1621     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1622       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1623       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1624     }
1625     rowlen = bi[i+1] - bi[i];
1626     for (j0=j=0; j<rowlen; j0=j) {
1627       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1628       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1629     }
1630   }
1631   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1633   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1634   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1635   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1636   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1637   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1638   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1639   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1640   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1641   *B = Aperm;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1647 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1648 {
1649   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1650   Mat            A    = mat->A,B = mat->B;
1651   PetscErrorCode ierr;
1652   PetscReal      isend[5],irecv[5];
1653 
1654   PetscFunctionBegin;
1655   info->block_size = 1.0;
1656   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1657 
1658   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1659   isend[3] = info->memory;  isend[4] = info->mallocs;
1660 
1661   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1662 
1663   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1664   isend[3] += info->memory;  isend[4] += info->mallocs;
1665   if (flag == MAT_LOCAL) {
1666     info->nz_used      = isend[0];
1667     info->nz_allocated = isend[1];
1668     info->nz_unneeded  = isend[2];
1669     info->memory       = isend[3];
1670     info->mallocs      = isend[4];
1671   } else if (flag == MAT_GLOBAL_MAX) {
1672     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   } else if (flag == MAT_GLOBAL_SUM) {
1680     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1681 
1682     info->nz_used      = irecv[0];
1683     info->nz_allocated = irecv[1];
1684     info->nz_unneeded  = irecv[2];
1685     info->memory       = irecv[3];
1686     info->mallocs      = irecv[4];
1687   }
1688   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1689   info->fill_ratio_needed = 0;
1690   info->factor_mallocs    = 0;
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatSetOption_MPIAIJ"
1696 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1697 {
1698   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   switch (op) {
1703   case MAT_NEW_NONZERO_LOCATIONS:
1704   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1705   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1706   case MAT_KEEP_NONZERO_PATTERN:
1707   case MAT_NEW_NONZERO_LOCATION_ERR:
1708   case MAT_USE_INODES:
1709   case MAT_IGNORE_ZERO_ENTRIES:
1710     MatCheckPreallocated(A,1);
1711     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1712     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1713     break;
1714   case MAT_ROW_ORIENTED:
1715     a->roworiented = flg;
1716 
1717     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1718     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1719     break;
1720   case MAT_NEW_DIAGONALS:
1721     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1722     break;
1723   case MAT_IGNORE_OFF_PROC_ENTRIES:
1724     a->donotstash = flg;
1725     break;
1726   case MAT_SPD:
1727     A->spd_set = PETSC_TRUE;
1728     A->spd     = flg;
1729     if (flg) {
1730       A->symmetric                  = PETSC_TRUE;
1731       A->structurally_symmetric     = PETSC_TRUE;
1732       A->symmetric_set              = PETSC_TRUE;
1733       A->structurally_symmetric_set = PETSC_TRUE;
1734     }
1735     break;
1736   case MAT_SYMMETRIC:
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     break;
1739   case MAT_STRUCTURALLY_SYMMETRIC:
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     break;
1742   case MAT_HERMITIAN:
1743     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1744     break;
1745   case MAT_SYMMETRY_ETERNAL:
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   default:
1749     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatGetRow_MPIAIJ"
1756 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1757 {
1758   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1759   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1760   PetscErrorCode ierr;
1761   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1762   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1763   PetscInt       *cmap,*idx_p;
1764 
1765   PetscFunctionBegin;
1766   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1767   mat->getrowactive = PETSC_TRUE;
1768 
1769   if (!mat->rowvalues && (idx || v)) {
1770     /*
1771         allocate enough space to hold information from the longest row.
1772     */
1773     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1774     PetscInt   max = 1,tmp;
1775     for (i=0; i<matin->rmap->n; i++) {
1776       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1777       if (max < tmp) max = tmp;
1778     }
1779     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1780   }
1781 
1782   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1783   lrow = row - rstart;
1784 
1785   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1786   if (!v)   {pvA = 0; pvB = 0;}
1787   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1788   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1789   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1790   nztot = nzA + nzB;
1791 
1792   cmap = mat->garray;
1793   if (v  || idx) {
1794     if (nztot) {
1795       /* Sort by increasing column numbers, assuming A and B already sorted */
1796       PetscInt imark = -1;
1797       if (v) {
1798         *v = v_p = mat->rowvalues;
1799         for (i=0; i<nzB; i++) {
1800           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1801           else break;
1802         }
1803         imark = i;
1804         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1805         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1806       }
1807       if (idx) {
1808         *idx = idx_p = mat->rowindices;
1809         if (imark > -1) {
1810           for (i=0; i<imark; i++) {
1811             idx_p[i] = cmap[cworkB[i]];
1812           }
1813         } else {
1814           for (i=0; i<nzB; i++) {
1815             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1816             else break;
1817           }
1818           imark = i;
1819         }
1820         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1821         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1822       }
1823     } else {
1824       if (idx) *idx = 0;
1825       if (v)   *v   = 0;
1826     }
1827   }
1828   *nz  = nztot;
1829   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1830   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1836 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1837 {
1838   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1839 
1840   PetscFunctionBegin;
1841   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1842   aij->getrowactive = PETSC_FALSE;
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatNorm_MPIAIJ"
1848 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1849 {
1850   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1851   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1852   PetscErrorCode ierr;
1853   PetscInt       i,j,cstart = mat->cmap->rstart;
1854   PetscReal      sum = 0.0;
1855   MatScalar      *v;
1856 
1857   PetscFunctionBegin;
1858   if (aij->size == 1) {
1859     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1860   } else {
1861     if (type == NORM_FROBENIUS) {
1862       v = amat->a;
1863       for (i=0; i<amat->nz; i++) {
1864         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1865       }
1866       v = bmat->a;
1867       for (i=0; i<bmat->nz; i++) {
1868         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1869       }
1870       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1871       *norm = PetscSqrtReal(*norm);
1872     } else if (type == NORM_1) { /* max column norm */
1873       PetscReal *tmp,*tmp2;
1874       PetscInt  *jj,*garray = aij->garray;
1875       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1876       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1877       *norm = 0.0;
1878       v     = amat->a; jj = amat->j;
1879       for (j=0; j<amat->nz; j++) {
1880         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1881       }
1882       v = bmat->a; jj = bmat->j;
1883       for (j=0; j<bmat->nz; j++) {
1884         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1885       }
1886       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1887       for (j=0; j<mat->cmap->N; j++) {
1888         if (tmp2[j] > *norm) *norm = tmp2[j];
1889       }
1890       ierr = PetscFree(tmp);CHKERRQ(ierr);
1891       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1892     } else if (type == NORM_INFINITY) { /* max row norm */
1893       PetscReal ntemp = 0.0;
1894       for (j=0; j<aij->A->rmap->n; j++) {
1895         v   = amat->a + amat->i[j];
1896         sum = 0.0;
1897         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1898           sum += PetscAbsScalar(*v); v++;
1899         }
1900         v = bmat->a + bmat->i[j];
1901         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1902           sum += PetscAbsScalar(*v); v++;
1903         }
1904         if (sum > ntemp) ntemp = sum;
1905       }
1906       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1907     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1908   }
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatTranspose_MPIAIJ"
1914 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1915 {
1916   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1917   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1918   PetscErrorCode ierr;
1919   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1920   PetscInt       cstart = A->cmap->rstart,ncol;
1921   Mat            B;
1922   MatScalar      *array;
1923 
1924   PetscFunctionBegin;
1925   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1926 
1927   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1928   ai = Aloc->i; aj = Aloc->j;
1929   bi = Bloc->i; bj = Bloc->j;
1930   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1931     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1932     PetscSFNode          *oloc;
1933     PETSC_UNUSED PetscSF sf;
1934 
1935     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1936     /* compute d_nnz for preallocation */
1937     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1938     for (i=0; i<ai[ma]; i++) {
1939       d_nnz[aj[i]]++;
1940       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1941     }
1942     /* compute local off-diagonal contributions */
1943     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1944     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1945     /* map those to global */
1946     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1947     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1948     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1949     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1950     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1951     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1952     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1953 
1954     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1955     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1956     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1957     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1958     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1959     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1960   } else {
1961     B    = *matout;
1962     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1963     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1964   }
1965 
1966   /* copy over the A part */
1967   array = Aloc->a;
1968   row   = A->rmap->rstart;
1969   for (i=0; i<ma; i++) {
1970     ncol = ai[i+1]-ai[i];
1971     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1972     row++;
1973     array += ncol; aj += ncol;
1974   }
1975   aj = Aloc->j;
1976   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1977 
1978   /* copy over the B part */
1979   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1980   array = Bloc->a;
1981   row   = A->rmap->rstart;
1982   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1983   cols_tmp = cols;
1984   for (i=0; i<mb; i++) {
1985     ncol = bi[i+1]-bi[i];
1986     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1987     row++;
1988     array += ncol; cols_tmp += ncol;
1989   }
1990   ierr = PetscFree(cols);CHKERRQ(ierr);
1991 
1992   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1995     *matout = B;
1996   } else {
1997     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2005 {
2006   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2007   Mat            a    = aij->A,b = aij->B;
2008   PetscErrorCode ierr;
2009   PetscInt       s1,s2,s3;
2010 
2011   PetscFunctionBegin;
2012   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2013   if (rr) {
2014     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2015     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2016     /* Overlap communication with computation. */
2017     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2018   }
2019   if (ll) {
2020     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2021     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2022     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2023   }
2024   /* scale  the diagonal block */
2025   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2026 
2027   if (rr) {
2028     /* Do a scatter end and then right scale the off-diagonal block */
2029     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2030     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2038 {
2039   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2040   PetscErrorCode ierr;
2041 
2042   PetscFunctionBegin;
2043   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatEqual_MPIAIJ"
2049 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2050 {
2051   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2052   Mat            a,b,c,d;
2053   PetscBool      flg;
2054   PetscErrorCode ierr;
2055 
2056   PetscFunctionBegin;
2057   a = matA->A; b = matA->B;
2058   c = matB->A; d = matB->B;
2059 
2060   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2061   if (flg) {
2062     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2063   }
2064   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatCopy_MPIAIJ"
2070 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2071 {
2072   PetscErrorCode ierr;
2073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2074   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2075 
2076   PetscFunctionBegin;
2077   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2078   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2079     /* because of the column compression in the off-processor part of the matrix a->B,
2080        the number of columns in a->B and b->B may be different, hence we cannot call
2081        the MatCopy() directly on the two parts. If need be, we can provide a more
2082        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2083        then copying the submatrices */
2084     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2085   } else {
2086     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2087     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2088   }
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "MatSetUp_MPIAIJ"
2094 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2095 {
2096   PetscErrorCode ierr;
2097 
2098   PetscFunctionBegin;
2099   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 /*
2104    Computes the number of nonzeros per row needed for preallocation when X and Y
2105    have different nonzero structure.
2106 */
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2109 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2110 {
2111   PetscInt       i,j,k,nzx,nzy;
2112 
2113   PetscFunctionBegin;
2114   /* Set the number of nonzeros in the new matrix */
2115   for (i=0; i<m; i++) {
2116     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2117     nzx = xi[i+1] - xi[i];
2118     nzy = yi[i+1] - yi[i];
2119     nnz[i] = 0;
2120     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2121       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2122       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2123       nnz[i]++;
2124     }
2125     for (; k<nzy; k++) nnz[i]++;
2126   }
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2133 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2134 {
2135   PetscErrorCode ierr;
2136   PetscInt       m = Y->rmap->N;
2137   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2138   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatAXPY_MPIAIJ"
2147 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2151   PetscBLASInt   bnz,one=1;
2152   Mat_SeqAIJ     *x,*y;
2153 
2154   PetscFunctionBegin;
2155   if (str == SAME_NONZERO_PATTERN) {
2156     PetscScalar alpha = a;
2157     x    = (Mat_SeqAIJ*)xx->A->data;
2158     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2159     y    = (Mat_SeqAIJ*)yy->A->data;
2160     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2161     x    = (Mat_SeqAIJ*)xx->B->data;
2162     y    = (Mat_SeqAIJ*)yy->B->data;
2163     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2164     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2165     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2166   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2167     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2168   } else {
2169     Mat      B;
2170     PetscInt *nnz_d,*nnz_o;
2171     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2172     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2173     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2174     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2175     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2176     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2177     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2178     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2179     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2180     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2181     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2182     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2183     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2184     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2185   }
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatConjugate_MPIAIJ"
2193 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2194 {
2195 #if defined(PETSC_USE_COMPLEX)
2196   PetscErrorCode ierr;
2197   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2198 
2199   PetscFunctionBegin;
2200   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2201   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2202 #else
2203   PetscFunctionBegin;
2204 #endif
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatRealPart_MPIAIJ"
2210 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2211 {
2212   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2217   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2223 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2224 {
2225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2226   PetscErrorCode ierr;
2227 
2228   PetscFunctionBegin;
2229   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2230   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 #if defined(PETSC_HAVE_PBGL)
2235 
2236 #include <boost/parallel/mpi/bsp_process_group.hpp>
2237 #include <boost/graph/distributed/ilu_default_graph.hpp>
2238 #include <boost/graph/distributed/ilu_0_block.hpp>
2239 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2240 #include <boost/graph/distributed/petsc/interface.hpp>
2241 #include <boost/multi_array.hpp>
2242 #include <boost/parallel/distributed_property_map->hpp>
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2246 /*
2247   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2248 */
2249 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2250 {
2251   namespace petsc = boost::distributed::petsc;
2252 
2253   namespace graph_dist = boost::graph::distributed;
2254   using boost::graph::distributed::ilu_default::process_group_type;
2255   using boost::graph::ilu_permuted;
2256 
2257   PetscBool      row_identity, col_identity;
2258   PetscContainer c;
2259   PetscInt       m, n, M, N;
2260   PetscErrorCode ierr;
2261 
2262   PetscFunctionBegin;
2263   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2264   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2265   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2266   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2267 
2268   process_group_type pg;
2269   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2270   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2271   lgraph_type& level_graph = *lgraph_p;
2272   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2273 
2274   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2275   ilu_permuted(level_graph);
2276 
2277   /* put together the new matrix */
2278   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2279   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2280   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2281   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2282   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2283   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2284   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2285   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2286 
2287   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2288   ierr = PetscContainerSetPointer(c, lgraph_p);
2289   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2290   ierr = PetscContainerDestroy(&c);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2296 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2297 {
2298   PetscFunctionBegin;
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatSolve_MPIAIJ"
2304 /*
2305   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2306 */
2307 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2308 {
2309   namespace graph_dist = boost::graph::distributed;
2310 
2311   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2312   lgraph_type    *lgraph_p;
2313   PetscContainer c;
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2318   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2319   ierr = VecCopy(b, x);CHKERRQ(ierr);
2320 
2321   PetscScalar *array_x;
2322   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2323   PetscInt sx;
2324   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2325 
2326   PetscScalar *array_b;
2327   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2328   PetscInt sb;
2329   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2330 
2331   lgraph_type& level_graph = *lgraph_p;
2332   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2333 
2334   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2335   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2336   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2337 
2338   typedef boost::iterator_property_map<array_ref_type::iterator,
2339                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2340   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2341   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2342 
2343   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2344   PetscFunctionReturn(0);
2345 }
2346 #endif
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2350 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2351 {
2352   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2353   PetscErrorCode ierr;
2354   PetscInt       i,*idxb = 0;
2355   PetscScalar    *va,*vb;
2356   Vec            vtmp;
2357 
2358   PetscFunctionBegin;
2359   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2360   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2361   if (idx) {
2362     for (i=0; i<A->rmap->n; i++) {
2363       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2364     }
2365   }
2366 
2367   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2368   if (idx) {
2369     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2370   }
2371   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2372   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2373 
2374   for (i=0; i<A->rmap->n; i++) {
2375     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2376       va[i] = vb[i];
2377       if (idx) idx[i] = a->garray[idxb[i]];
2378     }
2379   }
2380 
2381   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2382   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2383   ierr = PetscFree(idxb);CHKERRQ(ierr);
2384   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2391 {
2392   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2393   PetscErrorCode ierr;
2394   PetscInt       i,*idxb = 0;
2395   PetscScalar    *va,*vb;
2396   Vec            vtmp;
2397 
2398   PetscFunctionBegin;
2399   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2400   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2401   if (idx) {
2402     for (i=0; i<A->cmap->n; i++) {
2403       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2404     }
2405   }
2406 
2407   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2408   if (idx) {
2409     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2410   }
2411   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2412   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2413 
2414   for (i=0; i<A->rmap->n; i++) {
2415     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2416       va[i] = vb[i];
2417       if (idx) idx[i] = a->garray[idxb[i]];
2418     }
2419   }
2420 
2421   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2423   ierr = PetscFree(idxb);CHKERRQ(ierr);
2424   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2430 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2431 {
2432   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2433   PetscInt       n      = A->rmap->n;
2434   PetscInt       cstart = A->cmap->rstart;
2435   PetscInt       *cmap  = mat->garray;
2436   PetscInt       *diagIdx, *offdiagIdx;
2437   Vec            diagV, offdiagV;
2438   PetscScalar    *a, *diagA, *offdiagA;
2439   PetscInt       r;
2440   PetscErrorCode ierr;
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2444   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2445   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2446   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2447   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2448   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2449   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2450   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2451   for (r = 0; r < n; ++r) {
2452     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2453       a[r]   = diagA[r];
2454       idx[r] = cstart + diagIdx[r];
2455     } else {
2456       a[r]   = offdiagA[r];
2457       idx[r] = cmap[offdiagIdx[r]];
2458     }
2459   }
2460   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2461   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2463   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2464   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2465   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2471 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2472 {
2473   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2474   PetscInt       n      = A->rmap->n;
2475   PetscInt       cstart = A->cmap->rstart;
2476   PetscInt       *cmap  = mat->garray;
2477   PetscInt       *diagIdx, *offdiagIdx;
2478   Vec            diagV, offdiagV;
2479   PetscScalar    *a, *diagA, *offdiagA;
2480   PetscInt       r;
2481   PetscErrorCode ierr;
2482 
2483   PetscFunctionBegin;
2484   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2485   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2486   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2487   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2488   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2489   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2490   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2491   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2492   for (r = 0; r < n; ++r) {
2493     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2494       a[r]   = diagA[r];
2495       idx[r] = cstart + diagIdx[r];
2496     } else {
2497       a[r]   = offdiagA[r];
2498       idx[r] = cmap[offdiagIdx[r]];
2499     }
2500   }
2501   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2502   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2503   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2504   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2505   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2506   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2512 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2513 {
2514   PetscErrorCode ierr;
2515   Mat            *dummy;
2516 
2517   PetscFunctionBegin;
2518   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2519   *newmat = *dummy;
2520   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2526 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2527 {
2528   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2529   PetscErrorCode ierr;
2530 
2531   PetscFunctionBegin;
2532   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2538 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2539 {
2540   PetscErrorCode ierr;
2541   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2542 
2543   PetscFunctionBegin;
2544   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2545   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2546   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2547   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 /* -------------------------------------------------------------------*/
2552 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2553                                        MatGetRow_MPIAIJ,
2554                                        MatRestoreRow_MPIAIJ,
2555                                        MatMult_MPIAIJ,
2556                                 /* 4*/ MatMultAdd_MPIAIJ,
2557                                        MatMultTranspose_MPIAIJ,
2558                                        MatMultTransposeAdd_MPIAIJ,
2559 #if defined(PETSC_HAVE_PBGL)
2560                                        MatSolve_MPIAIJ,
2561 #else
2562                                        0,
2563 #endif
2564                                        0,
2565                                        0,
2566                                 /*10*/ 0,
2567                                        0,
2568                                        0,
2569                                        MatSOR_MPIAIJ,
2570                                        MatTranspose_MPIAIJ,
2571                                 /*15*/ MatGetInfo_MPIAIJ,
2572                                        MatEqual_MPIAIJ,
2573                                        MatGetDiagonal_MPIAIJ,
2574                                        MatDiagonalScale_MPIAIJ,
2575                                        MatNorm_MPIAIJ,
2576                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2577                                        MatAssemblyEnd_MPIAIJ,
2578                                        MatSetOption_MPIAIJ,
2579                                        MatZeroEntries_MPIAIJ,
2580                                 /*24*/ MatZeroRows_MPIAIJ,
2581                                        0,
2582 #if defined(PETSC_HAVE_PBGL)
2583                                        0,
2584 #else
2585                                        0,
2586 #endif
2587                                        0,
2588                                        0,
2589                                 /*29*/ MatSetUp_MPIAIJ,
2590 #if defined(PETSC_HAVE_PBGL)
2591                                        0,
2592 #else
2593                                        0,
2594 #endif
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*34*/ MatDuplicate_MPIAIJ,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                 /*39*/ MatAXPY_MPIAIJ,
2604                                        MatGetSubMatrices_MPIAIJ,
2605                                        MatIncreaseOverlap_MPIAIJ,
2606                                        MatGetValues_MPIAIJ,
2607                                        MatCopy_MPIAIJ,
2608                                 /*44*/ MatGetRowMax_MPIAIJ,
2609                                        MatScale_MPIAIJ,
2610                                        0,
2611                                        MatDiagonalSet_MPIAIJ,
2612                                        MatZeroRowsColumns_MPIAIJ,
2613                                 /*49*/ MatSetRandom_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2619                                        0,
2620                                        MatSetUnfactored_MPIAIJ,
2621                                        MatPermute_MPIAIJ,
2622                                        0,
2623                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2624                                        MatDestroy_MPIAIJ,
2625                                        MatView_MPIAIJ,
2626                                        0,
2627                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2628                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2629                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2634                                        MatGetRowMinAbs_MPIAIJ,
2635                                        0,
2636                                        MatSetColoring_MPIAIJ,
2637                                        0,
2638                                        MatSetValuesAdifor_MPIAIJ,
2639                                 /*75*/ MatFDColoringApply_AIJ,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        MatFindZeroDiagonals_MPIAIJ,
2644                                 /*80*/ 0,
2645                                        0,
2646                                        0,
2647                                 /*83*/ MatLoad_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2654                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2655                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2656                                        MatPtAP_MPIAIJ_MPIAIJ,
2657                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2658                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*99*/ 0,
2664                                        0,
2665                                        0,
2666                                        MatConjugate_MPIAIJ,
2667                                        0,
2668                                 /*104*/MatSetValuesRow_MPIAIJ,
2669                                        MatRealPart_MPIAIJ,
2670                                        MatImaginaryPart_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                 /*109*/0,
2674                                        0,
2675                                        MatGetRowMin_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                 /*119*/0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        MatGetMultiProcBlock_MPIAIJ,
2688                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2689                                        MatGetColumnNorms_MPIAIJ,
2690                                        MatInvertBlockDiagonal_MPIAIJ,
2691                                        0,
2692                                        MatGetSubMatricesParallel_MPIAIJ,
2693                                 /*129*/0,
2694                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2695                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2696                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2697                                        0,
2698                                 /*134*/0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                 /*139*/0,
2704                                        0,
2705                                        0,
2706                                        MatFDColoringSetUp_MPIXAIJ,
2707                                        0,
2708                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2709 };
2710 
2711 /* ----------------------------------------------------------------------------------------*/
2712 
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2715 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2716 {
2717   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2722   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2728 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2729 {
2730   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2731   PetscErrorCode ierr;
2732 
2733   PetscFunctionBegin;
2734   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2735   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2741 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2742 {
2743   Mat_MPIAIJ     *b;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2748   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2749   b = (Mat_MPIAIJ*)B->data;
2750 
2751   if (!B->preallocated) {
2752     /* Explicitly create 2 MATSEQAIJ matrices. */
2753     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2754     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2755     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2756     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2757     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2758     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2759     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2760     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2761     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2762     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2763   }
2764 
2765   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2766   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2767   B->preallocated = PETSC_TRUE;
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 #undef __FUNCT__
2772 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2773 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2774 {
2775   Mat            mat;
2776   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2777   PetscErrorCode ierr;
2778 
2779   PetscFunctionBegin;
2780   *newmat = 0;
2781   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2782   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2783   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2784   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2785   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2786   a       = (Mat_MPIAIJ*)mat->data;
2787 
2788   mat->factortype   = matin->factortype;
2789   mat->assembled    = PETSC_TRUE;
2790   mat->insertmode   = NOT_SET_VALUES;
2791   mat->preallocated = PETSC_TRUE;
2792 
2793   a->size         = oldmat->size;
2794   a->rank         = oldmat->rank;
2795   a->donotstash   = oldmat->donotstash;
2796   a->roworiented  = oldmat->roworiented;
2797   a->rowindices   = 0;
2798   a->rowvalues    = 0;
2799   a->getrowactive = PETSC_FALSE;
2800 
2801   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2803 
2804   if (oldmat->colmap) {
2805 #if defined(PETSC_USE_CTABLE)
2806     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2807 #else
2808     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
2809     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2810     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2811 #endif
2812   } else a->colmap = 0;
2813   if (oldmat->garray) {
2814     PetscInt len;
2815     len  = oldmat->B->cmap->n;
2816     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
2817     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2818     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2819   } else a->garray = 0;
2820 
2821   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2822   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2823   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2824   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2825   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2826   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2827   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2828   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2829   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2830   *newmat = mat;
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 
2835 
2836 #undef __FUNCT__
2837 #define __FUNCT__ "MatLoad_MPIAIJ"
2838 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2839 {
2840   PetscScalar    *vals,*svals;
2841   MPI_Comm       comm;
2842   PetscErrorCode ierr;
2843   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2844   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2845   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2846   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2847   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2848   int            fd;
2849   PetscInt       bs = 1;
2850 
2851   PetscFunctionBegin;
2852   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2853   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2854   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2855   if (!rank) {
2856     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2857     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2858     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2859   }
2860 
2861   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
2862   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2863   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2864 
2865   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2866 
2867   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2868   M    = header[1]; N = header[2];
2869   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2870   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2871   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2872 
2873   /* If global sizes are set, check if they are consistent with that given in the file */
2874   if (sizesset) {
2875     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2876   }
2877   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2878   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2879 
2880   /* determine ownership of all (block) rows */
2881   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2882   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2883   else m = newMat->rmap->n; /* Set by user */
2884 
2885   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
2886   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2887 
2888   /* First process needs enough room for process with most rows */
2889   if (!rank) {
2890     mmax = rowners[1];
2891     for (i=2; i<=size; i++) {
2892       mmax = PetscMax(mmax, rowners[i]);
2893     }
2894   } else mmax = -1;             /* unused, but compilers complain */
2895 
2896   rowners[0] = 0;
2897   for (i=2; i<=size; i++) {
2898     rowners[i] += rowners[i-1];
2899   }
2900   rstart = rowners[rank];
2901   rend   = rowners[rank+1];
2902 
2903   /* distribute row lengths to all processors */
2904   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2905   if (!rank) {
2906     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2907     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2908     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2909     for (j=0; j<m; j++) {
2910       procsnz[0] += ourlens[j];
2911     }
2912     for (i=1; i<size; i++) {
2913       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2914       /* calculate the number of nonzeros on each processor */
2915       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2916         procsnz[i] += rowlengths[j];
2917       }
2918       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2919     }
2920     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2921   } else {
2922     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2923   }
2924 
2925   if (!rank) {
2926     /* determine max buffer needed and allocate it */
2927     maxnz = 0;
2928     for (i=0; i<size; i++) {
2929       maxnz = PetscMax(maxnz,procsnz[i]);
2930     }
2931     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2932 
2933     /* read in my part of the matrix column indices  */
2934     nz   = procsnz[0];
2935     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2936     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2937 
2938     /* read in every one elses and ship off */
2939     for (i=1; i<size; i++) {
2940       nz   = procsnz[i];
2941       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2942       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2943     }
2944     ierr = PetscFree(cols);CHKERRQ(ierr);
2945   } else {
2946     /* determine buffer space needed for message */
2947     nz = 0;
2948     for (i=0; i<m; i++) {
2949       nz += ourlens[i];
2950     }
2951     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2952 
2953     /* receive message of column indices*/
2954     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2955   }
2956 
2957   /* determine column ownership if matrix is not square */
2958   if (N != M) {
2959     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2960     else n = newMat->cmap->n;
2961     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2962     cstart = cend - n;
2963   } else {
2964     cstart = rstart;
2965     cend   = rend;
2966     n      = cend - cstart;
2967   }
2968 
2969   /* loop over local rows, determining number of off diagonal entries */
2970   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2971   jj   = 0;
2972   for (i=0; i<m; i++) {
2973     for (j=0; j<ourlens[i]; j++) {
2974       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2975       jj++;
2976     }
2977   }
2978 
2979   for (i=0; i<m; i++) {
2980     ourlens[i] -= offlens[i];
2981   }
2982   if (!sizesset) {
2983     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2984   }
2985 
2986   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2987 
2988   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2989 
2990   for (i=0; i<m; i++) {
2991     ourlens[i] += offlens[i];
2992   }
2993 
2994   if (!rank) {
2995     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
2996 
2997     /* read in my part of the matrix numerical values  */
2998     nz   = procsnz[0];
2999     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3000 
3001     /* insert into matrix */
3002     jj      = rstart;
3003     smycols = mycols;
3004     svals   = vals;
3005     for (i=0; i<m; i++) {
3006       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3007       smycols += ourlens[i];
3008       svals   += ourlens[i];
3009       jj++;
3010     }
3011 
3012     /* read in other processors and ship out */
3013     for (i=1; i<size; i++) {
3014       nz   = procsnz[i];
3015       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3016       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3017     }
3018     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3019   } else {
3020     /* receive numeric values */
3021     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3022 
3023     /* receive message of values*/
3024     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3025 
3026     /* insert into matrix */
3027     jj      = rstart;
3028     smycols = mycols;
3029     svals   = vals;
3030     for (i=0; i<m; i++) {
3031       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3032       smycols += ourlens[i];
3033       svals   += ourlens[i];
3034       jj++;
3035     }
3036   }
3037   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3038   ierr = PetscFree(vals);CHKERRQ(ierr);
3039   ierr = PetscFree(mycols);CHKERRQ(ierr);
3040   ierr = PetscFree(rowners);CHKERRQ(ierr);
3041   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3042   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 #undef __FUNCT__
3047 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3048 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3049 {
3050   PetscErrorCode ierr;
3051   IS             iscol_local;
3052   PetscInt       csize;
3053 
3054   PetscFunctionBegin;
3055   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3056   if (call == MAT_REUSE_MATRIX) {
3057     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3058     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3059   } else {
3060     PetscInt cbs;
3061     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3062     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3063     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3064   }
3065   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3066   if (call == MAT_INITIAL_MATRIX) {
3067     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3068     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3069   }
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3074 #undef __FUNCT__
3075 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3076 /*
3077     Not great since it makes two copies of the submatrix, first an SeqAIJ
3078   in local and then by concatenating the local matrices the end result.
3079   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3080 
3081   Note: This requires a sequential iscol with all indices.
3082 */
3083 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3084 {
3085   PetscErrorCode ierr;
3086   PetscMPIInt    rank,size;
3087   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3088   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3089   PetscBool      allcolumns, colflag;
3090   Mat            M,Mreuse;
3091   MatScalar      *vwork,*aa;
3092   MPI_Comm       comm;
3093   Mat_SeqAIJ     *aij;
3094 
3095   PetscFunctionBegin;
3096   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3097   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3098   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3099 
3100   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3101   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3102   if (colflag && ncol == mat->cmap->N) {
3103     allcolumns = PETSC_TRUE;
3104   } else {
3105     allcolumns = PETSC_FALSE;
3106   }
3107   if (call ==  MAT_REUSE_MATRIX) {
3108     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3109     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3110     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3111   } else {
3112     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3113   }
3114 
3115   /*
3116       m - number of local rows
3117       n - number of columns (same on all processors)
3118       rstart - first row in new global matrix generated
3119   */
3120   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3121   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3122   if (call == MAT_INITIAL_MATRIX) {
3123     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3124     ii  = aij->i;
3125     jj  = aij->j;
3126 
3127     /*
3128         Determine the number of non-zeros in the diagonal and off-diagonal
3129         portions of the matrix in order to do correct preallocation
3130     */
3131 
3132     /* first get start and end of "diagonal" columns */
3133     if (csize == PETSC_DECIDE) {
3134       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3135       if (mglobal == n) { /* square matrix */
3136         nlocal = m;
3137       } else {
3138         nlocal = n/size + ((n % size) > rank);
3139       }
3140     } else {
3141       nlocal = csize;
3142     }
3143     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3144     rstart = rend - nlocal;
3145     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3146 
3147     /* next, compute all the lengths */
3148     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3149     olens = dlens + m;
3150     for (i=0; i<m; i++) {
3151       jend = ii[i+1] - ii[i];
3152       olen = 0;
3153       dlen = 0;
3154       for (j=0; j<jend; j++) {
3155         if (*jj < rstart || *jj >= rend) olen++;
3156         else dlen++;
3157         jj++;
3158       }
3159       olens[i] = olen;
3160       dlens[i] = dlen;
3161     }
3162     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3163     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3164     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3165     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3166     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3167     ierr = PetscFree(dlens);CHKERRQ(ierr);
3168   } else {
3169     PetscInt ml,nl;
3170 
3171     M    = *newmat;
3172     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3173     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3174     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3175     /*
3176          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3177        rather than the slower MatSetValues().
3178     */
3179     M->was_assembled = PETSC_TRUE;
3180     M->assembled     = PETSC_FALSE;
3181   }
3182   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3183   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3184   ii   = aij->i;
3185   jj   = aij->j;
3186   aa   = aij->a;
3187   for (i=0; i<m; i++) {
3188     row   = rstart + i;
3189     nz    = ii[i+1] - ii[i];
3190     cwork = jj;     jj += nz;
3191     vwork = aa;     aa += nz;
3192     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3193   }
3194 
3195   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3196   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3197   *newmat = M;
3198 
3199   /* save submatrix used in processor for next request */
3200   if (call ==  MAT_INITIAL_MATRIX) {
3201     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3202     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3203   }
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 #undef __FUNCT__
3208 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3209 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3210 {
3211   PetscInt       m,cstart, cend,j,nnz,i,d;
3212   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3213   const PetscInt *JJ;
3214   PetscScalar    *values;
3215   PetscErrorCode ierr;
3216 
3217   PetscFunctionBegin;
3218   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3219 
3220   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3221   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3222   m      = B->rmap->n;
3223   cstart = B->cmap->rstart;
3224   cend   = B->cmap->rend;
3225   rstart = B->rmap->rstart;
3226 
3227   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3228 
3229 #if defined(PETSC_USE_DEBUGGING)
3230   for (i=0; i<m; i++) {
3231     nnz = Ii[i+1]- Ii[i];
3232     JJ  = J + Ii[i];
3233     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3234     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3235     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3236   }
3237 #endif
3238 
3239   for (i=0; i<m; i++) {
3240     nnz     = Ii[i+1]- Ii[i];
3241     JJ      = J + Ii[i];
3242     nnz_max = PetscMax(nnz_max,nnz);
3243     d       = 0;
3244     for (j=0; j<nnz; j++) {
3245       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3246     }
3247     d_nnz[i] = d;
3248     o_nnz[i] = nnz - d;
3249   }
3250   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3251   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3252 
3253   if (v) values = (PetscScalar*)v;
3254   else {
3255     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3256   }
3257 
3258   for (i=0; i<m; i++) {
3259     ii   = i + rstart;
3260     nnz  = Ii[i+1]- Ii[i];
3261     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3262   }
3263   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265 
3266   if (!v) {
3267     ierr = PetscFree(values);CHKERRQ(ierr);
3268   }
3269   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3270   PetscFunctionReturn(0);
3271 }
3272 
3273 #undef __FUNCT__
3274 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3275 /*@
3276    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3277    (the default parallel PETSc format).
3278 
3279    Collective on MPI_Comm
3280 
3281    Input Parameters:
3282 +  B - the matrix
3283 .  i - the indices into j for the start of each local row (starts with zero)
3284 .  j - the column indices for each local row (starts with zero)
3285 -  v - optional values in the matrix
3286 
3287    Level: developer
3288 
3289    Notes:
3290        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3291      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3292      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3293 
3294        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3295 
3296        The format which is used for the sparse matrix input, is equivalent to a
3297     row-major ordering.. i.e for the following matrix, the input data expected is
3298     as shown:
3299 
3300         1 0 0
3301         2 0 3     P0
3302        -------
3303         4 5 6     P1
3304 
3305      Process0 [P0]: rows_owned=[0,1]
3306         i =  {0,1,3}  [size = nrow+1  = 2+1]
3307         j =  {0,0,2}  [size = nz = 6]
3308         v =  {1,2,3}  [size = nz = 6]
3309 
3310      Process1 [P1]: rows_owned=[2]
3311         i =  {0,3}    [size = nrow+1  = 1+1]
3312         j =  {0,1,2}  [size = nz = 6]
3313         v =  {4,5,6}  [size = nz = 6]
3314 
3315 .keywords: matrix, aij, compressed row, sparse, parallel
3316 
3317 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3318           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3319 @*/
3320 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3321 {
3322   PetscErrorCode ierr;
3323 
3324   PetscFunctionBegin;
3325   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3326   PetscFunctionReturn(0);
3327 }
3328 
3329 #undef __FUNCT__
3330 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3331 /*@C
3332    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3333    (the default parallel PETSc format).  For good matrix assembly performance
3334    the user should preallocate the matrix storage by setting the parameters
3335    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3336    performance can be increased by more than a factor of 50.
3337 
3338    Collective on MPI_Comm
3339 
3340    Input Parameters:
3341 +  B - the matrix
3342 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3343            (same value is used for all local rows)
3344 .  d_nnz - array containing the number of nonzeros in the various rows of the
3345            DIAGONAL portion of the local submatrix (possibly different for each row)
3346            or NULL, if d_nz is used to specify the nonzero structure.
3347            The size of this array is equal to the number of local rows, i.e 'm'.
3348            For matrices that will be factored, you must leave room for (and set)
3349            the diagonal entry even if it is zero.
3350 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3351            submatrix (same value is used for all local rows).
3352 -  o_nnz - array containing the number of nonzeros in the various rows of the
3353            OFF-DIAGONAL portion of the local submatrix (possibly different for
3354            each row) or NULL, if o_nz is used to specify the nonzero
3355            structure. The size of this array is equal to the number
3356            of local rows, i.e 'm'.
3357 
3358    If the *_nnz parameter is given then the *_nz parameter is ignored
3359 
3360    The AIJ format (also called the Yale sparse matrix format or
3361    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3362    storage.  The stored row and column indices begin with zero.
3363    See Users-Manual: ch_mat for details.
3364 
3365    The parallel matrix is partitioned such that the first m0 rows belong to
3366    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3367    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3368 
3369    The DIAGONAL portion of the local submatrix of a processor can be defined
3370    as the submatrix which is obtained by extraction the part corresponding to
3371    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3372    first row that belongs to the processor, r2 is the last row belonging to
3373    the this processor, and c1-c2 is range of indices of the local part of a
3374    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3375    common case of a square matrix, the row and column ranges are the same and
3376    the DIAGONAL part is also square. The remaining portion of the local
3377    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3378 
3379    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3380 
3381    You can call MatGetInfo() to get information on how effective the preallocation was;
3382    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3383    You can also run with the option -info and look for messages with the string
3384    malloc in them to see if additional memory allocation was needed.
3385 
3386    Example usage:
3387 
3388    Consider the following 8x8 matrix with 34 non-zero values, that is
3389    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3390    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3391    as follows:
3392 
3393 .vb
3394             1  2  0  |  0  3  0  |  0  4
3395     Proc0   0  5  6  |  7  0  0  |  8  0
3396             9  0 10  | 11  0  0  | 12  0
3397     -------------------------------------
3398            13  0 14  | 15 16 17  |  0  0
3399     Proc1   0 18  0  | 19 20 21  |  0  0
3400             0  0  0  | 22 23  0  | 24  0
3401     -------------------------------------
3402     Proc2  25 26 27  |  0  0 28  | 29  0
3403            30  0  0  | 31 32 33  |  0 34
3404 .ve
3405 
3406    This can be represented as a collection of submatrices as:
3407 
3408 .vb
3409       A B C
3410       D E F
3411       G H I
3412 .ve
3413 
3414    Where the submatrices A,B,C are owned by proc0, D,E,F are
3415    owned by proc1, G,H,I are owned by proc2.
3416 
3417    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3418    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3419    The 'M','N' parameters are 8,8, and have the same values on all procs.
3420 
3421    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3422    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3423    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3424    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3425    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3426    matrix, ans [DF] as another SeqAIJ matrix.
3427 
3428    When d_nz, o_nz parameters are specified, d_nz storage elements are
3429    allocated for every row of the local diagonal submatrix, and o_nz
3430    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3431    One way to choose d_nz and o_nz is to use the max nonzerors per local
3432    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3433    In this case, the values of d_nz,o_nz are:
3434 .vb
3435      proc0 : dnz = 2, o_nz = 2
3436      proc1 : dnz = 3, o_nz = 2
3437      proc2 : dnz = 1, o_nz = 4
3438 .ve
3439    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3440    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3441    for proc3. i.e we are using 12+15+10=37 storage locations to store
3442    34 values.
3443 
3444    When d_nnz, o_nnz parameters are specified, the storage is specified
3445    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3446    In the above case the values for d_nnz,o_nnz are:
3447 .vb
3448      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3449      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3450      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3451 .ve
3452    Here the space allocated is sum of all the above values i.e 34, and
3453    hence pre-allocation is perfect.
3454 
3455    Level: intermediate
3456 
3457 .keywords: matrix, aij, compressed row, sparse, parallel
3458 
3459 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3460           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3461 @*/
3462 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3463 {
3464   PetscErrorCode ierr;
3465 
3466   PetscFunctionBegin;
3467   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3468   PetscValidType(B,1);
3469   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3470   PetscFunctionReturn(0);
3471 }
3472 
3473 #undef __FUNCT__
3474 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3475 /*@
3476      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3477          CSR format the local rows.
3478 
3479    Collective on MPI_Comm
3480 
3481    Input Parameters:
3482 +  comm - MPI communicator
3483 .  m - number of local rows (Cannot be PETSC_DECIDE)
3484 .  n - This value should be the same as the local size used in creating the
3485        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3486        calculated if N is given) For square matrices n is almost always m.
3487 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3488 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3489 .   i - row indices
3490 .   j - column indices
3491 -   a - matrix values
3492 
3493    Output Parameter:
3494 .   mat - the matrix
3495 
3496    Level: intermediate
3497 
3498    Notes:
3499        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3500      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3501      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3502 
3503        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3504 
3505        The format which is used for the sparse matrix input, is equivalent to a
3506     row-major ordering.. i.e for the following matrix, the input data expected is
3507     as shown:
3508 
3509         1 0 0
3510         2 0 3     P0
3511        -------
3512         4 5 6     P1
3513 
3514      Process0 [P0]: rows_owned=[0,1]
3515         i =  {0,1,3}  [size = nrow+1  = 2+1]
3516         j =  {0,0,2}  [size = nz = 6]
3517         v =  {1,2,3}  [size = nz = 6]
3518 
3519      Process1 [P1]: rows_owned=[2]
3520         i =  {0,3}    [size = nrow+1  = 1+1]
3521         j =  {0,1,2}  [size = nz = 6]
3522         v =  {4,5,6}  [size = nz = 6]
3523 
3524 .keywords: matrix, aij, compressed row, sparse, parallel
3525 
3526 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3527           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3528 @*/
3529 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3530 {
3531   PetscErrorCode ierr;
3532 
3533   PetscFunctionBegin;
3534   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3535   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3536   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3537   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3538   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3539   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3540   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3541   PetscFunctionReturn(0);
3542 }
3543 
3544 #undef __FUNCT__
3545 #define __FUNCT__ "MatCreateAIJ"
3546 /*@C
3547    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3548    (the default parallel PETSc format).  For good matrix assembly performance
3549    the user should preallocate the matrix storage by setting the parameters
3550    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3551    performance can be increased by more than a factor of 50.
3552 
3553    Collective on MPI_Comm
3554 
3555    Input Parameters:
3556 +  comm - MPI communicator
3557 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3558            This value should be the same as the local size used in creating the
3559            y vector for the matrix-vector product y = Ax.
3560 .  n - This value should be the same as the local size used in creating the
3561        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3562        calculated if N is given) For square matrices n is almost always m.
3563 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3564 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3565 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3566            (same value is used for all local rows)
3567 .  d_nnz - array containing the number of nonzeros in the various rows of the
3568            DIAGONAL portion of the local submatrix (possibly different for each row)
3569            or NULL, if d_nz is used to specify the nonzero structure.
3570            The size of this array is equal to the number of local rows, i.e 'm'.
3571 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3572            submatrix (same value is used for all local rows).
3573 -  o_nnz - array containing the number of nonzeros in the various rows of the
3574            OFF-DIAGONAL portion of the local submatrix (possibly different for
3575            each row) or NULL, if o_nz is used to specify the nonzero
3576            structure. The size of this array is equal to the number
3577            of local rows, i.e 'm'.
3578 
3579    Output Parameter:
3580 .  A - the matrix
3581 
3582    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3583    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3584    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3585 
3586    Notes:
3587    If the *_nnz parameter is given then the *_nz parameter is ignored
3588 
3589    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3590    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3591    storage requirements for this matrix.
3592 
3593    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3594    processor than it must be used on all processors that share the object for
3595    that argument.
3596 
3597    The user MUST specify either the local or global matrix dimensions
3598    (possibly both).
3599 
3600    The parallel matrix is partitioned across processors such that the
3601    first m0 rows belong to process 0, the next m1 rows belong to
3602    process 1, the next m2 rows belong to process 2 etc.. where
3603    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3604    values corresponding to [m x N] submatrix.
3605 
3606    The columns are logically partitioned with the n0 columns belonging
3607    to 0th partition, the next n1 columns belonging to the next
3608    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3609 
3610    The DIAGONAL portion of the local submatrix on any given processor
3611    is the submatrix corresponding to the rows and columns m,n
3612    corresponding to the given processor. i.e diagonal matrix on
3613    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3614    etc. The remaining portion of the local submatrix [m x (N-n)]
3615    constitute the OFF-DIAGONAL portion. The example below better
3616    illustrates this concept.
3617 
3618    For a square global matrix we define each processor's diagonal portion
3619    to be its local rows and the corresponding columns (a square submatrix);
3620    each processor's off-diagonal portion encompasses the remainder of the
3621    local matrix (a rectangular submatrix).
3622 
3623    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3624 
3625    When calling this routine with a single process communicator, a matrix of
3626    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3627    type of communicator, use the construction mechanism:
3628      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3629 
3630    By default, this format uses inodes (identical nodes) when possible.
3631    We search for consecutive rows with the same nonzero structure, thereby
3632    reusing matrix information to achieve increased efficiency.
3633 
3634    Options Database Keys:
3635 +  -mat_no_inode  - Do not use inodes
3636 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3637 -  -mat_aij_oneindex - Internally use indexing starting at 1
3638         rather than 0.  Note that when calling MatSetValues(),
3639         the user still MUST index entries starting at 0!
3640 
3641 
3642    Example usage:
3643 
3644    Consider the following 8x8 matrix with 34 non-zero values, that is
3645    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3646    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3647    as follows:
3648 
3649 .vb
3650             1  2  0  |  0  3  0  |  0  4
3651     Proc0   0  5  6  |  7  0  0  |  8  0
3652             9  0 10  | 11  0  0  | 12  0
3653     -------------------------------------
3654            13  0 14  | 15 16 17  |  0  0
3655     Proc1   0 18  0  | 19 20 21  |  0  0
3656             0  0  0  | 22 23  0  | 24  0
3657     -------------------------------------
3658     Proc2  25 26 27  |  0  0 28  | 29  0
3659            30  0  0  | 31 32 33  |  0 34
3660 .ve
3661 
3662    This can be represented as a collection of submatrices as:
3663 
3664 .vb
3665       A B C
3666       D E F
3667       G H I
3668 .ve
3669 
3670    Where the submatrices A,B,C are owned by proc0, D,E,F are
3671    owned by proc1, G,H,I are owned by proc2.
3672 
3673    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3674    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3675    The 'M','N' parameters are 8,8, and have the same values on all procs.
3676 
3677    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3678    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3679    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3680    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3681    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3682    matrix, ans [DF] as another SeqAIJ matrix.
3683 
3684    When d_nz, o_nz parameters are specified, d_nz storage elements are
3685    allocated for every row of the local diagonal submatrix, and o_nz
3686    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3687    One way to choose d_nz and o_nz is to use the max nonzerors per local
3688    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3689    In this case, the values of d_nz,o_nz are:
3690 .vb
3691      proc0 : dnz = 2, o_nz = 2
3692      proc1 : dnz = 3, o_nz = 2
3693      proc2 : dnz = 1, o_nz = 4
3694 .ve
3695    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3696    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3697    for proc3. i.e we are using 12+15+10=37 storage locations to store
3698    34 values.
3699 
3700    When d_nnz, o_nnz parameters are specified, the storage is specified
3701    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3702    In the above case the values for d_nnz,o_nnz are:
3703 .vb
3704      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3705      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3706      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3707 .ve
3708    Here the space allocated is sum of all the above values i.e 34, and
3709    hence pre-allocation is perfect.
3710 
3711    Level: intermediate
3712 
3713 .keywords: matrix, aij, compressed row, sparse, parallel
3714 
3715 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3716           MPIAIJ, MatCreateMPIAIJWithArrays()
3717 @*/
3718 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3719 {
3720   PetscErrorCode ierr;
3721   PetscMPIInt    size;
3722 
3723   PetscFunctionBegin;
3724   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3725   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3726   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3727   if (size > 1) {
3728     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3729     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3730   } else {
3731     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3732     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3733   }
3734   PetscFunctionReturn(0);
3735 }
3736 
3737 #undef __FUNCT__
3738 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3739 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3740 {
3741   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3742 
3743   PetscFunctionBegin;
3744   if (Ad)     *Ad     = a->A;
3745   if (Ao)     *Ao     = a->B;
3746   if (colmap) *colmap = a->garray;
3747   PetscFunctionReturn(0);
3748 }
3749 
3750 #undef __FUNCT__
3751 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3752 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3753 {
3754   PetscErrorCode ierr;
3755   PetscInt       i;
3756   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3757 
3758   PetscFunctionBegin;
3759   if (coloring->ctype == IS_COLORING_GLOBAL) {
3760     ISColoringValue *allcolors,*colors;
3761     ISColoring      ocoloring;
3762 
3763     /* set coloring for diagonal portion */
3764     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3765 
3766     /* set coloring for off-diagonal portion */
3767     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3768     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3769     for (i=0; i<a->B->cmap->n; i++) {
3770       colors[i] = allcolors[a->garray[i]];
3771     }
3772     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3773     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3774     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3775     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3776   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3777     ISColoringValue *colors;
3778     PetscInt        *larray;
3779     ISColoring      ocoloring;
3780 
3781     /* set coloring for diagonal portion */
3782     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
3783     for (i=0; i<a->A->cmap->n; i++) {
3784       larray[i] = i + A->cmap->rstart;
3785     }
3786     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3787     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
3788     for (i=0; i<a->A->cmap->n; i++) {
3789       colors[i] = coloring->colors[larray[i]];
3790     }
3791     ierr = PetscFree(larray);CHKERRQ(ierr);
3792     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3793     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3794     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3795 
3796     /* set coloring for off-diagonal portion */
3797     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
3798     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3799     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
3800     for (i=0; i<a->B->cmap->n; i++) {
3801       colors[i] = coloring->colors[larray[i]];
3802     }
3803     ierr = PetscFree(larray);CHKERRQ(ierr);
3804     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3805     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3806     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3807   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3808   PetscFunctionReturn(0);
3809 }
3810 
3811 #undef __FUNCT__
3812 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3813 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3814 {
3815   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3816   PetscErrorCode ierr;
3817 
3818   PetscFunctionBegin;
3819   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3820   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3821   PetscFunctionReturn(0);
3822 }
3823 
3824 #undef __FUNCT__
3825 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3826 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3827 {
3828   PetscErrorCode ierr;
3829   PetscInt       m,N,i,rstart,nnz,Ii;
3830   PetscInt       *indx;
3831   PetscScalar    *values;
3832 
3833   PetscFunctionBegin;
3834   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3835   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3836     PetscInt       *dnz,*onz,sum,bs,cbs;
3837 
3838     if (n == PETSC_DECIDE) {
3839       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3840     }
3841     /* Check sum(n) = N */
3842     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3843     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3844 
3845     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3846     rstart -= m;
3847 
3848     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3849     for (i=0; i<m; i++) {
3850       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3851       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3852       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3853     }
3854 
3855     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3856     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3857     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3858     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3859     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3860     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3861     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3862   }
3863 
3864   /* numeric phase */
3865   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3866   for (i=0; i<m; i++) {
3867     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3868     Ii   = i + rstart;
3869     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3870     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3871   }
3872   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3873   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3874   PetscFunctionReturn(0);
3875 }
3876 
3877 #undef __FUNCT__
3878 #define __FUNCT__ "MatFileSplit"
3879 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3880 {
3881   PetscErrorCode    ierr;
3882   PetscMPIInt       rank;
3883   PetscInt          m,N,i,rstart,nnz;
3884   size_t            len;
3885   const PetscInt    *indx;
3886   PetscViewer       out;
3887   char              *name;
3888   Mat               B;
3889   const PetscScalar *values;
3890 
3891   PetscFunctionBegin;
3892   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3893   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3894   /* Should this be the type of the diagonal block of A? */
3895   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3896   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3897   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3898   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3899   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3900   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3901   for (i=0; i<m; i++) {
3902     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3903     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3904     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3905   }
3906   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3907   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3908 
3909   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3910   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3911   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
3912   sprintf(name,"%s.%d",outfile,rank);
3913   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3914   ierr = PetscFree(name);CHKERRQ(ierr);
3915   ierr = MatView(B,out);CHKERRQ(ierr);
3916   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3917   ierr = MatDestroy(&B);CHKERRQ(ierr);
3918   PetscFunctionReturn(0);
3919 }
3920 
3921 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3922 #undef __FUNCT__
3923 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3924 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3925 {
3926   PetscErrorCode      ierr;
3927   Mat_Merge_SeqsToMPI *merge;
3928   PetscContainer      container;
3929 
3930   PetscFunctionBegin;
3931   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3932   if (container) {
3933     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3934     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3935     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3936     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3937     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3938     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3939     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3940     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3941     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3942     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3943     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3944     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3945     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3946     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3947     ierr = PetscFree(merge);CHKERRQ(ierr);
3948     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3949   }
3950   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3951   PetscFunctionReturn(0);
3952 }
3953 
3954 #include <../src/mat/utils/freespace.h>
3955 #include <petscbt.h>
3956 
3957 #undef __FUNCT__
3958 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3959 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3960 {
3961   PetscErrorCode      ierr;
3962   MPI_Comm            comm;
3963   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3964   PetscMPIInt         size,rank,taga,*len_s;
3965   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3966   PetscInt            proc,m;
3967   PetscInt            **buf_ri,**buf_rj;
3968   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3969   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3970   MPI_Request         *s_waits,*r_waits;
3971   MPI_Status          *status;
3972   MatScalar           *aa=a->a;
3973   MatScalar           **abuf_r,*ba_i;
3974   Mat_Merge_SeqsToMPI *merge;
3975   PetscContainer      container;
3976 
3977   PetscFunctionBegin;
3978   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3979   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3980 
3981   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3982   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3983 
3984   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3985   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3986 
3987   bi     = merge->bi;
3988   bj     = merge->bj;
3989   buf_ri = merge->buf_ri;
3990   buf_rj = merge->buf_rj;
3991 
3992   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3993   owners = merge->rowmap->range;
3994   len_s  = merge->len_s;
3995 
3996   /* send and recv matrix values */
3997   /*-----------------------------*/
3998   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3999   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4000 
4001   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4002   for (proc=0,k=0; proc<size; proc++) {
4003     if (!len_s[proc]) continue;
4004     i    = owners[proc];
4005     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4006     k++;
4007   }
4008 
4009   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4010   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4011   ierr = PetscFree(status);CHKERRQ(ierr);
4012 
4013   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4014   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4015 
4016   /* insert mat values of mpimat */
4017   /*----------------------------*/
4018   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4019   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4020 
4021   for (k=0; k<merge->nrecv; k++) {
4022     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4023     nrows       = *(buf_ri_k[k]);
4024     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4025     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4026   }
4027 
4028   /* set values of ba */
4029   m = merge->rowmap->n;
4030   for (i=0; i<m; i++) {
4031     arow = owners[rank] + i;
4032     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4033     bnzi = bi[i+1] - bi[i];
4034     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4035 
4036     /* add local non-zero vals of this proc's seqmat into ba */
4037     anzi   = ai[arow+1] - ai[arow];
4038     aj     = a->j + ai[arow];
4039     aa     = a->a + ai[arow];
4040     nextaj = 0;
4041     for (j=0; nextaj<anzi; j++) {
4042       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4043         ba_i[j] += aa[nextaj++];
4044       }
4045     }
4046 
4047     /* add received vals into ba */
4048     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4049       /* i-th row */
4050       if (i == *nextrow[k]) {
4051         anzi   = *(nextai[k]+1) - *nextai[k];
4052         aj     = buf_rj[k] + *(nextai[k]);
4053         aa     = abuf_r[k] + *(nextai[k]);
4054         nextaj = 0;
4055         for (j=0; nextaj<anzi; j++) {
4056           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4057             ba_i[j] += aa[nextaj++];
4058           }
4059         }
4060         nextrow[k]++; nextai[k]++;
4061       }
4062     }
4063     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4064   }
4065   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4066   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4067 
4068   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4069   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4070   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4071   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4072   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4073   PetscFunctionReturn(0);
4074 }
4075 
4076 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4077 
4078 #undef __FUNCT__
4079 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4080 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4081 {
4082   PetscErrorCode      ierr;
4083   Mat                 B_mpi;
4084   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4085   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4086   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4087   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4088   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4089   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4090   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4091   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4092   MPI_Status          *status;
4093   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4094   PetscBT             lnkbt;
4095   Mat_Merge_SeqsToMPI *merge;
4096   PetscContainer      container;
4097 
4098   PetscFunctionBegin;
4099   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4100 
4101   /* make sure it is a PETSc comm */
4102   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4103   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4104   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4105 
4106   ierr = PetscNew(&merge);CHKERRQ(ierr);
4107   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4108 
4109   /* determine row ownership */
4110   /*---------------------------------------------------------*/
4111   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4112   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4113   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4114   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4115   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4116   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4117   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4118 
4119   m      = merge->rowmap->n;
4120   owners = merge->rowmap->range;
4121 
4122   /* determine the number of messages to send, their lengths */
4123   /*---------------------------------------------------------*/
4124   len_s = merge->len_s;
4125 
4126   len          = 0; /* length of buf_si[] */
4127   merge->nsend = 0;
4128   for (proc=0; proc<size; proc++) {
4129     len_si[proc] = 0;
4130     if (proc == rank) {
4131       len_s[proc] = 0;
4132     } else {
4133       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4134       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4135     }
4136     if (len_s[proc]) {
4137       merge->nsend++;
4138       nrows = 0;
4139       for (i=owners[proc]; i<owners[proc+1]; i++) {
4140         if (ai[i+1] > ai[i]) nrows++;
4141       }
4142       len_si[proc] = 2*(nrows+1);
4143       len         += len_si[proc];
4144     }
4145   }
4146 
4147   /* determine the number and length of messages to receive for ij-structure */
4148   /*-------------------------------------------------------------------------*/
4149   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4150   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4151 
4152   /* post the Irecv of j-structure */
4153   /*-------------------------------*/
4154   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4155   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4156 
4157   /* post the Isend of j-structure */
4158   /*--------------------------------*/
4159   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4160 
4161   for (proc=0, k=0; proc<size; proc++) {
4162     if (!len_s[proc]) continue;
4163     i    = owners[proc];
4164     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4165     k++;
4166   }
4167 
4168   /* receives and sends of j-structure are complete */
4169   /*------------------------------------------------*/
4170   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4171   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4172 
4173   /* send and recv i-structure */
4174   /*---------------------------*/
4175   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4176   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4177 
4178   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4179   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4180   for (proc=0,k=0; proc<size; proc++) {
4181     if (!len_s[proc]) continue;
4182     /* form outgoing message for i-structure:
4183          buf_si[0]:                 nrows to be sent
4184                [1:nrows]:           row index (global)
4185                [nrows+1:2*nrows+1]: i-structure index
4186     */
4187     /*-------------------------------------------*/
4188     nrows       = len_si[proc]/2 - 1;
4189     buf_si_i    = buf_si + nrows+1;
4190     buf_si[0]   = nrows;
4191     buf_si_i[0] = 0;
4192     nrows       = 0;
4193     for (i=owners[proc]; i<owners[proc+1]; i++) {
4194       anzi = ai[i+1] - ai[i];
4195       if (anzi) {
4196         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4197         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4198         nrows++;
4199       }
4200     }
4201     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4202     k++;
4203     buf_si += len_si[proc];
4204   }
4205 
4206   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4207   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4208 
4209   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4210   for (i=0; i<merge->nrecv; i++) {
4211     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4212   }
4213 
4214   ierr = PetscFree(len_si);CHKERRQ(ierr);
4215   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4216   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4217   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4218   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4219   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4220   ierr = PetscFree(status);CHKERRQ(ierr);
4221 
4222   /* compute a local seq matrix in each processor */
4223   /*----------------------------------------------*/
4224   /* allocate bi array and free space for accumulating nonzero column info */
4225   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4226   bi[0] = 0;
4227 
4228   /* create and initialize a linked list */
4229   nlnk = N+1;
4230   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4231 
4232   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4233   len  = ai[owners[rank+1]] - ai[owners[rank]];
4234   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4235 
4236   current_space = free_space;
4237 
4238   /* determine symbolic info for each local row */
4239   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4240 
4241   for (k=0; k<merge->nrecv; k++) {
4242     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4243     nrows       = *buf_ri_k[k];
4244     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4245     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4246   }
4247 
4248   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4249   len  = 0;
4250   for (i=0; i<m; i++) {
4251     bnzi = 0;
4252     /* add local non-zero cols of this proc's seqmat into lnk */
4253     arow  = owners[rank] + i;
4254     anzi  = ai[arow+1] - ai[arow];
4255     aj    = a->j + ai[arow];
4256     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4257     bnzi += nlnk;
4258     /* add received col data into lnk */
4259     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4260       if (i == *nextrow[k]) { /* i-th row */
4261         anzi  = *(nextai[k]+1) - *nextai[k];
4262         aj    = buf_rj[k] + *nextai[k];
4263         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4264         bnzi += nlnk;
4265         nextrow[k]++; nextai[k]++;
4266       }
4267     }
4268     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4269 
4270     /* if free space is not available, make more free space */
4271     if (current_space->local_remaining<bnzi) {
4272       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4273       nspacedouble++;
4274     }
4275     /* copy data into free space, then initialize lnk */
4276     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4277     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4278 
4279     current_space->array           += bnzi;
4280     current_space->local_used      += bnzi;
4281     current_space->local_remaining -= bnzi;
4282 
4283     bi[i+1] = bi[i] + bnzi;
4284   }
4285 
4286   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4287 
4288   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4289   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4290   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4291 
4292   /* create symbolic parallel matrix B_mpi */
4293   /*---------------------------------------*/
4294   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4295   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4296   if (n==PETSC_DECIDE) {
4297     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4298   } else {
4299     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4300   }
4301   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4302   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4303   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4304   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4305   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4306 
4307   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4308   B_mpi->assembled    = PETSC_FALSE;
4309   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4310   merge->bi           = bi;
4311   merge->bj           = bj;
4312   merge->buf_ri       = buf_ri;
4313   merge->buf_rj       = buf_rj;
4314   merge->coi          = NULL;
4315   merge->coj          = NULL;
4316   merge->owners_co    = NULL;
4317 
4318   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4319 
4320   /* attach the supporting struct to B_mpi for reuse */
4321   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4322   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4323   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4324   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4325   *mpimat = B_mpi;
4326 
4327   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4333 /*@C
4334       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4335                  matrices from each processor
4336 
4337     Collective on MPI_Comm
4338 
4339    Input Parameters:
4340 +    comm - the communicators the parallel matrix will live on
4341 .    seqmat - the input sequential matrices
4342 .    m - number of local rows (or PETSC_DECIDE)
4343 .    n - number of local columns (or PETSC_DECIDE)
4344 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4345 
4346    Output Parameter:
4347 .    mpimat - the parallel matrix generated
4348 
4349     Level: advanced
4350 
4351    Notes:
4352      The dimensions of the sequential matrix in each processor MUST be the same.
4353      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4354      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4355 @*/
4356 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4357 {
4358   PetscErrorCode ierr;
4359   PetscMPIInt    size;
4360 
4361   PetscFunctionBegin;
4362   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4363   if (size == 1) {
4364     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4365     if (scall == MAT_INITIAL_MATRIX) {
4366       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4367     } else {
4368       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4369     }
4370     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4371     PetscFunctionReturn(0);
4372   }
4373   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4374   if (scall == MAT_INITIAL_MATRIX) {
4375     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4376   }
4377   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4378   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4379   PetscFunctionReturn(0);
4380 }
4381 
4382 #undef __FUNCT__
4383 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4384 /*@
4385      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4386           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4387           with MatGetSize()
4388 
4389     Not Collective
4390 
4391    Input Parameters:
4392 +    A - the matrix
4393 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4394 
4395    Output Parameter:
4396 .    A_loc - the local sequential matrix generated
4397 
4398     Level: developer
4399 
4400 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4401 
4402 @*/
4403 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4404 {
4405   PetscErrorCode ierr;
4406   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4407   Mat_SeqAIJ     *mat,*a,*b;
4408   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4409   MatScalar      *aa,*ba,*cam;
4410   PetscScalar    *ca;
4411   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4412   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4413   PetscBool      match;
4414   MPI_Comm       comm;
4415   PetscMPIInt    size;
4416 
4417   PetscFunctionBegin;
4418   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4419   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4420   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4421   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4422   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4423 
4424   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4425   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4426   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4427   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4428   aa = a->a; ba = b->a;
4429   if (scall == MAT_INITIAL_MATRIX) {
4430     if (size == 1) {
4431       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4432       PetscFunctionReturn(0);
4433     }
4434 
4435     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4436     ci[0] = 0;
4437     for (i=0; i<am; i++) {
4438       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4439     }
4440     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4441     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4442     k    = 0;
4443     for (i=0; i<am; i++) {
4444       ncols_o = bi[i+1] - bi[i];
4445       ncols_d = ai[i+1] - ai[i];
4446       /* off-diagonal portion of A */
4447       for (jo=0; jo<ncols_o; jo++) {
4448         col = cmap[*bj];
4449         if (col >= cstart) break;
4450         cj[k]   = col; bj++;
4451         ca[k++] = *ba++;
4452       }
4453       /* diagonal portion of A */
4454       for (j=0; j<ncols_d; j++) {
4455         cj[k]   = cstart + *aj++;
4456         ca[k++] = *aa++;
4457       }
4458       /* off-diagonal portion of A */
4459       for (j=jo; j<ncols_o; j++) {
4460         cj[k]   = cmap[*bj++];
4461         ca[k++] = *ba++;
4462       }
4463     }
4464     /* put together the new matrix */
4465     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4466     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4467     /* Since these are PETSc arrays, change flags to free them as necessary. */
4468     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4469     mat->free_a  = PETSC_TRUE;
4470     mat->free_ij = PETSC_TRUE;
4471     mat->nonew   = 0;
4472   } else if (scall == MAT_REUSE_MATRIX) {
4473     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4474     ci = mat->i; cj = mat->j; cam = mat->a;
4475     for (i=0; i<am; i++) {
4476       /* off-diagonal portion of A */
4477       ncols_o = bi[i+1] - bi[i];
4478       for (jo=0; jo<ncols_o; jo++) {
4479         col = cmap[*bj];
4480         if (col >= cstart) break;
4481         *cam++ = *ba++; bj++;
4482       }
4483       /* diagonal portion of A */
4484       ncols_d = ai[i+1] - ai[i];
4485       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4486       /* off-diagonal portion of A */
4487       for (j=jo; j<ncols_o; j++) {
4488         *cam++ = *ba++; bj++;
4489       }
4490     }
4491   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4492   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4493   PetscFunctionReturn(0);
4494 }
4495 
4496 #undef __FUNCT__
4497 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4498 /*@C
4499      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4500 
4501     Not Collective
4502 
4503    Input Parameters:
4504 +    A - the matrix
4505 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4506 -    row, col - index sets of rows and columns to extract (or NULL)
4507 
4508    Output Parameter:
4509 .    A_loc - the local sequential matrix generated
4510 
4511     Level: developer
4512 
4513 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4514 
4515 @*/
4516 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4517 {
4518   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4519   PetscErrorCode ierr;
4520   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4521   IS             isrowa,iscola;
4522   Mat            *aloc;
4523   PetscBool      match;
4524 
4525   PetscFunctionBegin;
4526   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4527   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4528   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4529   if (!row) {
4530     start = A->rmap->rstart; end = A->rmap->rend;
4531     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4532   } else {
4533     isrowa = *row;
4534   }
4535   if (!col) {
4536     start = A->cmap->rstart;
4537     cmap  = a->garray;
4538     nzA   = a->A->cmap->n;
4539     nzB   = a->B->cmap->n;
4540     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4541     ncols = 0;
4542     for (i=0; i<nzB; i++) {
4543       if (cmap[i] < start) idx[ncols++] = cmap[i];
4544       else break;
4545     }
4546     imark = i;
4547     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4548     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4549     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4550   } else {
4551     iscola = *col;
4552   }
4553   if (scall != MAT_INITIAL_MATRIX) {
4554     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4555     aloc[0] = *A_loc;
4556   }
4557   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4558   *A_loc = aloc[0];
4559   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4560   if (!row) {
4561     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4562   }
4563   if (!col) {
4564     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4565   }
4566   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4567   PetscFunctionReturn(0);
4568 }
4569 
4570 #undef __FUNCT__
4571 #define __FUNCT__ "MatGetBrowsOfAcols"
4572 /*@C
4573     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4574 
4575     Collective on Mat
4576 
4577    Input Parameters:
4578 +    A,B - the matrices in mpiaij format
4579 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4580 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4581 
4582    Output Parameter:
4583 +    rowb, colb - index sets of rows and columns of B to extract
4584 -    B_seq - the sequential matrix generated
4585 
4586     Level: developer
4587 
4588 @*/
4589 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4590 {
4591   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4592   PetscErrorCode ierr;
4593   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4594   IS             isrowb,iscolb;
4595   Mat            *bseq=NULL;
4596 
4597   PetscFunctionBegin;
4598   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4599     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4600   }
4601   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4602 
4603   if (scall == MAT_INITIAL_MATRIX) {
4604     start = A->cmap->rstart;
4605     cmap  = a->garray;
4606     nzA   = a->A->cmap->n;
4607     nzB   = a->B->cmap->n;
4608     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
4609     ncols = 0;
4610     for (i=0; i<nzB; i++) {  /* row < local row index */
4611       if (cmap[i] < start) idx[ncols++] = cmap[i];
4612       else break;
4613     }
4614     imark = i;
4615     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4616     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4617     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4618     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4619   } else {
4620     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4621     isrowb  = *rowb; iscolb = *colb;
4622     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4623     bseq[0] = *B_seq;
4624   }
4625   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4626   *B_seq = bseq[0];
4627   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4628   if (!rowb) {
4629     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4630   } else {
4631     *rowb = isrowb;
4632   }
4633   if (!colb) {
4634     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4635   } else {
4636     *colb = iscolb;
4637   }
4638   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4639   PetscFunctionReturn(0);
4640 }
4641 
4642 #undef __FUNCT__
4643 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4644 /*
4645     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4646     of the OFF-DIAGONAL portion of local A
4647 
4648     Collective on Mat
4649 
4650    Input Parameters:
4651 +    A,B - the matrices in mpiaij format
4652 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4653 
4654    Output Parameter:
4655 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4656 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4657 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4658 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4659 
4660     Level: developer
4661 
4662 */
4663 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4664 {
4665   VecScatter_MPI_General *gen_to,*gen_from;
4666   PetscErrorCode         ierr;
4667   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4668   Mat_SeqAIJ             *b_oth;
4669   VecScatter             ctx =a->Mvctx;
4670   MPI_Comm               comm;
4671   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4672   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4673   PetscScalar            *rvalues,*svalues;
4674   MatScalar              *b_otha,*bufa,*bufA;
4675   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4676   MPI_Request            *rwaits = NULL,*swaits = NULL;
4677   MPI_Status             *sstatus,rstatus;
4678   PetscMPIInt            jj,size;
4679   PetscInt               *cols,sbs,rbs;
4680   PetscScalar            *vals;
4681 
4682   PetscFunctionBegin;
4683   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4684   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4685   if (size == 1) PetscFunctionReturn(0);
4686 
4687   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4688     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4689   }
4690   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4691   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4692 
4693   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4694   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4695   rvalues  = gen_from->values; /* holds the length of receiving row */
4696   svalues  = gen_to->values;   /* holds the length of sending row */
4697   nrecvs   = gen_from->n;
4698   nsends   = gen_to->n;
4699 
4700   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4701   srow    = gen_to->indices;    /* local row index to be sent */
4702   sstarts = gen_to->starts;
4703   sprocs  = gen_to->procs;
4704   sstatus = gen_to->sstatus;
4705   sbs     = gen_to->bs;
4706   rstarts = gen_from->starts;
4707   rprocs  = gen_from->procs;
4708   rbs     = gen_from->bs;
4709 
4710   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4711   if (scall == MAT_INITIAL_MATRIX) {
4712     /* i-array */
4713     /*---------*/
4714     /*  post receives */
4715     for (i=0; i<nrecvs; i++) {
4716       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4717       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4718       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4719     }
4720 
4721     /* pack the outgoing message */
4722     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4723 
4724     sstartsj[0] = 0;
4725     rstartsj[0] = 0;
4726     len         = 0; /* total length of j or a array to be sent */
4727     k           = 0;
4728     for (i=0; i<nsends; i++) {
4729       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4730       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4731       for (j=0; j<nrows; j++) {
4732         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4733         for (l=0; l<sbs; l++) {
4734           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4735 
4736           rowlen[j*sbs+l] = ncols;
4737 
4738           len += ncols;
4739           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4740         }
4741         k++;
4742       }
4743       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4744 
4745       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4746     }
4747     /* recvs and sends of i-array are completed */
4748     i = nrecvs;
4749     while (i--) {
4750       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4751     }
4752     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4753 
4754     /* allocate buffers for sending j and a arrays */
4755     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
4756     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
4757 
4758     /* create i-array of B_oth */
4759     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
4760 
4761     b_othi[0] = 0;
4762     len       = 0; /* total length of j or a array to be received */
4763     k         = 0;
4764     for (i=0; i<nrecvs; i++) {
4765       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4766       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4767       for (j=0; j<nrows; j++) {
4768         b_othi[k+1] = b_othi[k] + rowlen[j];
4769         len        += rowlen[j]; k++;
4770       }
4771       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4772     }
4773 
4774     /* allocate space for j and a arrrays of B_oth */
4775     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
4776     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
4777 
4778     /* j-array */
4779     /*---------*/
4780     /*  post receives of j-array */
4781     for (i=0; i<nrecvs; i++) {
4782       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4783       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4784     }
4785 
4786     /* pack the outgoing message j-array */
4787     k = 0;
4788     for (i=0; i<nsends; i++) {
4789       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4790       bufJ  = bufj+sstartsj[i];
4791       for (j=0; j<nrows; j++) {
4792         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4793         for (ll=0; ll<sbs; ll++) {
4794           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4795           for (l=0; l<ncols; l++) {
4796             *bufJ++ = cols[l];
4797           }
4798           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4799         }
4800       }
4801       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4802     }
4803 
4804     /* recvs and sends of j-array are completed */
4805     i = nrecvs;
4806     while (i--) {
4807       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4808     }
4809     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4810   } else if (scall == MAT_REUSE_MATRIX) {
4811     sstartsj = *startsj_s;
4812     rstartsj = *startsj_r;
4813     bufa     = *bufa_ptr;
4814     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4815     b_otha   = b_oth->a;
4816   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4817 
4818   /* a-array */
4819   /*---------*/
4820   /*  post receives of a-array */
4821   for (i=0; i<nrecvs; i++) {
4822     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4823     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4824   }
4825 
4826   /* pack the outgoing message a-array */
4827   k = 0;
4828   for (i=0; i<nsends; i++) {
4829     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4830     bufA  = bufa+sstartsj[i];
4831     for (j=0; j<nrows; j++) {
4832       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4833       for (ll=0; ll<sbs; ll++) {
4834         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4835         for (l=0; l<ncols; l++) {
4836           *bufA++ = vals[l];
4837         }
4838         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4839       }
4840     }
4841     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4842   }
4843   /* recvs and sends of a-array are completed */
4844   i = nrecvs;
4845   while (i--) {
4846     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4847   }
4848   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4849   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4850 
4851   if (scall == MAT_INITIAL_MATRIX) {
4852     /* put together the new matrix */
4853     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4854 
4855     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4856     /* Since these are PETSc arrays, change flags to free them as necessary. */
4857     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4858     b_oth->free_a  = PETSC_TRUE;
4859     b_oth->free_ij = PETSC_TRUE;
4860     b_oth->nonew   = 0;
4861 
4862     ierr = PetscFree(bufj);CHKERRQ(ierr);
4863     if (!startsj_s || !bufa_ptr) {
4864       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4865       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4866     } else {
4867       *startsj_s = sstartsj;
4868       *startsj_r = rstartsj;
4869       *bufa_ptr  = bufa;
4870     }
4871   }
4872   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4873   PetscFunctionReturn(0);
4874 }
4875 
4876 #undef __FUNCT__
4877 #define __FUNCT__ "MatGetCommunicationStructs"
4878 /*@C
4879   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4880 
4881   Not Collective
4882 
4883   Input Parameters:
4884 . A - The matrix in mpiaij format
4885 
4886   Output Parameter:
4887 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4888 . colmap - A map from global column index to local index into lvec
4889 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4890 
4891   Level: developer
4892 
4893 @*/
4894 #if defined(PETSC_USE_CTABLE)
4895 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4896 #else
4897 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4898 #endif
4899 {
4900   Mat_MPIAIJ *a;
4901 
4902   PetscFunctionBegin;
4903   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4904   PetscValidPointer(lvec, 2);
4905   PetscValidPointer(colmap, 3);
4906   PetscValidPointer(multScatter, 4);
4907   a = (Mat_MPIAIJ*) A->data;
4908   if (lvec) *lvec = a->lvec;
4909   if (colmap) *colmap = a->colmap;
4910   if (multScatter) *multScatter = a->Mvctx;
4911   PetscFunctionReturn(0);
4912 }
4913 
4914 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4915 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4916 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4917 #if defined(PETSC_HAVE_ELEMENTAL)
4918 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4919 #endif
4920 
4921 #undef __FUNCT__
4922 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4923 /*
4924     Computes (B'*A')' since computing B*A directly is untenable
4925 
4926                n                       p                          p
4927         (              )       (              )         (                  )
4928       m (      A       )  *  n (       B      )   =   m (         C        )
4929         (              )       (              )         (                  )
4930 
4931 */
4932 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4933 {
4934   PetscErrorCode ierr;
4935   Mat            At,Bt,Ct;
4936 
4937   PetscFunctionBegin;
4938   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4939   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4940   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4941   ierr = MatDestroy(&At);CHKERRQ(ierr);
4942   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4943   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4944   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4945   PetscFunctionReturn(0);
4946 }
4947 
4948 #undef __FUNCT__
4949 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4950 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4951 {
4952   PetscErrorCode ierr;
4953   PetscInt       m=A->rmap->n,n=B->cmap->n;
4954   Mat            Cmat;
4955 
4956   PetscFunctionBegin;
4957   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4958   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4959   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4960   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4961   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4962   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4963   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4964   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4965 
4966   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4967 
4968   *C = Cmat;
4969   PetscFunctionReturn(0);
4970 }
4971 
4972 /* ----------------------------------------------------------------*/
4973 #undef __FUNCT__
4974 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4975 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4976 {
4977   PetscErrorCode ierr;
4978 
4979   PetscFunctionBegin;
4980   if (scall == MAT_INITIAL_MATRIX) {
4981     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4982     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4983     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4984   }
4985   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4986   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4987   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4988   PetscFunctionReturn(0);
4989 }
4990 
4991 /*MC
4992    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4993 
4994    Options Database Keys:
4995 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4996 
4997   Level: beginner
4998 
4999 .seealso: MatCreateAIJ()
5000 M*/
5001 
5002 #undef __FUNCT__
5003 #define __FUNCT__ "MatCreate_MPIAIJ"
5004 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5005 {
5006   Mat_MPIAIJ     *b;
5007   PetscErrorCode ierr;
5008   PetscMPIInt    size;
5009 
5010   PetscFunctionBegin;
5011   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5012 
5013   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5014   B->data       = (void*)b;
5015   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5016   B->assembled  = PETSC_FALSE;
5017   B->insertmode = NOT_SET_VALUES;
5018   b->size       = size;
5019 
5020   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5021 
5022   /* build cache for off array entries formed */
5023   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5024 
5025   b->donotstash  = PETSC_FALSE;
5026   b->colmap      = 0;
5027   b->garray      = 0;
5028   b->roworiented = PETSC_TRUE;
5029 
5030   /* stuff used for matrix vector multiply */
5031   b->lvec  = NULL;
5032   b->Mvctx = NULL;
5033 
5034   /* stuff for MatGetRow() */
5035   b->rowindices   = 0;
5036   b->rowvalues    = 0;
5037   b->getrowactive = PETSC_FALSE;
5038 
5039   /* flexible pointer used in CUSP/CUSPARSE classes */
5040   b->spptr = NULL;
5041 
5042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5052 #if defined(PETSC_HAVE_ELEMENTAL)
5053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5054 #endif
5055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5058   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5059   PetscFunctionReturn(0);
5060 }
5061 
5062 #undef __FUNCT__
5063 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5064 /*@C
5065      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5066          and "off-diagonal" part of the matrix in CSR format.
5067 
5068    Collective on MPI_Comm
5069 
5070    Input Parameters:
5071 +  comm - MPI communicator
5072 .  m - number of local rows (Cannot be PETSC_DECIDE)
5073 .  n - This value should be the same as the local size used in creating the
5074        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5075        calculated if N is given) For square matrices n is almost always m.
5076 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5077 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5078 .   i - row indices for "diagonal" portion of matrix
5079 .   j - column indices
5080 .   a - matrix values
5081 .   oi - row indices for "off-diagonal" portion of matrix
5082 .   oj - column indices
5083 -   oa - matrix values
5084 
5085    Output Parameter:
5086 .   mat - the matrix
5087 
5088    Level: advanced
5089 
5090    Notes:
5091        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5092        must free the arrays once the matrix has been destroyed and not before.
5093 
5094        The i and j indices are 0 based
5095 
5096        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5097 
5098        This sets local rows and cannot be used to set off-processor values.
5099 
5100        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5101        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5102        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5103        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5104        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5105        communication if it is known that only local entries will be set.
5106 
5107 .keywords: matrix, aij, compressed row, sparse, parallel
5108 
5109 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5110           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5111 C@*/
5112 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5113 {
5114   PetscErrorCode ierr;
5115   Mat_MPIAIJ     *maij;
5116 
5117   PetscFunctionBegin;
5118   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5119   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5120   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5121   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5122   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5123   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5124   maij = (Mat_MPIAIJ*) (*mat)->data;
5125 
5126   (*mat)->preallocated = PETSC_TRUE;
5127 
5128   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5129   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5130 
5131   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5132   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5133 
5134   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5135   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5136   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5137   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5138 
5139   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5140   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5141   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5142   PetscFunctionReturn(0);
5143 }
5144 
5145 /*
5146     Special version for direct calls from Fortran
5147 */
5148 #include <petsc-private/fortranimpl.h>
5149 
5150 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5151 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5152 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5153 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5154 #endif
5155 
5156 /* Change these macros so can be used in void function */
5157 #undef CHKERRQ
5158 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5159 #undef SETERRQ2
5160 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5161 #undef SETERRQ3
5162 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5163 #undef SETERRQ
5164 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5165 
5166 #undef __FUNCT__
5167 #define __FUNCT__ "matsetvaluesmpiaij_"
5168 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5169 {
5170   Mat            mat  = *mmat;
5171   PetscInt       m    = *mm, n = *mn;
5172   InsertMode     addv = *maddv;
5173   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5174   PetscScalar    value;
5175   PetscErrorCode ierr;
5176 
5177   MatCheckPreallocated(mat,1);
5178   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5179 
5180 #if defined(PETSC_USE_DEBUG)
5181   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5182 #endif
5183   {
5184     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5185     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5186     PetscBool roworiented = aij->roworiented;
5187 
5188     /* Some Variables required in the macro */
5189     Mat        A                 = aij->A;
5190     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5191     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5192     MatScalar  *aa               = a->a;
5193     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5194     Mat        B                 = aij->B;
5195     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5196     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5197     MatScalar  *ba               = b->a;
5198 
5199     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5200     PetscInt  nonew = a->nonew;
5201     MatScalar *ap1,*ap2;
5202 
5203     PetscFunctionBegin;
5204     for (i=0; i<m; i++) {
5205       if (im[i] < 0) continue;
5206 #if defined(PETSC_USE_DEBUG)
5207       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5208 #endif
5209       if (im[i] >= rstart && im[i] < rend) {
5210         row      = im[i] - rstart;
5211         lastcol1 = -1;
5212         rp1      = aj + ai[row];
5213         ap1      = aa + ai[row];
5214         rmax1    = aimax[row];
5215         nrow1    = ailen[row];
5216         low1     = 0;
5217         high1    = nrow1;
5218         lastcol2 = -1;
5219         rp2      = bj + bi[row];
5220         ap2      = ba + bi[row];
5221         rmax2    = bimax[row];
5222         nrow2    = bilen[row];
5223         low2     = 0;
5224         high2    = nrow2;
5225 
5226         for (j=0; j<n; j++) {
5227           if (roworiented) value = v[i*n+j];
5228           else value = v[i+j*m];
5229           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5230           if (in[j] >= cstart && in[j] < cend) {
5231             col = in[j] - cstart;
5232             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5233           } else if (in[j] < 0) continue;
5234 #if defined(PETSC_USE_DEBUG)
5235           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5236 #endif
5237           else {
5238             if (mat->was_assembled) {
5239               if (!aij->colmap) {
5240                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5241               }
5242 #if defined(PETSC_USE_CTABLE)
5243               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5244               col--;
5245 #else
5246               col = aij->colmap[in[j]] - 1;
5247 #endif
5248               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5249                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5250                 col  =  in[j];
5251                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5252                 B     = aij->B;
5253                 b     = (Mat_SeqAIJ*)B->data;
5254                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5255                 rp2   = bj + bi[row];
5256                 ap2   = ba + bi[row];
5257                 rmax2 = bimax[row];
5258                 nrow2 = bilen[row];
5259                 low2  = 0;
5260                 high2 = nrow2;
5261                 bm    = aij->B->rmap->n;
5262                 ba    = b->a;
5263               }
5264             } else col = in[j];
5265             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5266           }
5267         }
5268       } else if (!aij->donotstash) {
5269         if (roworiented) {
5270           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5271         } else {
5272           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5273         }
5274       }
5275     }
5276   }
5277   PetscFunctionReturnVoid();
5278 }
5279 
5280