xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8a84db2dfe7416430d07f30490d8cd16f3a5bd12)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatSetBlockSizes_MPIAIJ"
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
62 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
63 {
64   PetscErrorCode  ierr;
65   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
66   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
67   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
68   const PetscInt  *ia,*ib;
69   const MatScalar *aa,*bb;
70   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
71   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
72 
73   PetscFunctionBegin;
74   *keptrows = 0;
75   ia        = a->i;
76   ib        = b->i;
77   for (i=0; i<m; i++) {
78     na = ia[i+1] - ia[i];
79     nb = ib[i+1] - ib[i];
80     if (!na && !nb) {
81       cnt++;
82       goto ok1;
83     }
84     aa = a->a + ia[i];
85     for (j=0; j<na; j++) {
86       if (aa[j] != 0.0) goto ok1;
87     }
88     bb = b->a + ib[i];
89     for (j=0; j <nb; j++) {
90       if (bb[j] != 0.0) goto ok1;
91     }
92     cnt++;
93 ok1:;
94   }
95   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
96   if (!n0rows) PetscFunctionReturn(0);
97   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
98   cnt  = 0;
99   for (i=0; i<m; i++) {
100     na = ia[i+1] - ia[i];
101     nb = ib[i+1] - ib[i];
102     if (!na && !nb) continue;
103     aa = a->a + ia[i];
104     for (j=0; j<na;j++) {
105       if (aa[j] != 0.0) {
106         rows[cnt++] = rstart + i;
107         goto ok2;
108       }
109     }
110     bb = b->a + ib[i];
111     for (j=0; j<nb; j++) {
112       if (bb[j] != 0.0) {
113         rows[cnt++] = rstart + i;
114         goto ok2;
115       }
116     }
117 ok2:;
118   }
119   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
125 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
126 {
127   PetscErrorCode    ierr;
128   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
129 
130   PetscFunctionBegin;
131   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
132     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
133   } else {
134     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
135   }
136   PetscFunctionReturn(0);
137 }
138 
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
142 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
143 {
144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
145   PetscErrorCode ierr;
146   PetscInt       i,rstart,nrows,*rows;
147 
148   PetscFunctionBegin;
149   *zrows = NULL;
150   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
151   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
152   for (i=0; i<nrows; i++) rows[i] += rstart;
153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
159 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
160 {
161   PetscErrorCode ierr;
162   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
163   PetscInt       i,n,*garray = aij->garray;
164   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
165   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
166   PetscReal      *work;
167 
168   PetscFunctionBegin;
169   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
170   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
171   if (type == NORM_2) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
177     }
178   } else if (type == NORM_1) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
184     }
185   } else if (type == NORM_INFINITY) {
186     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
187       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
188     }
189     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
190       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
191     }
192 
193   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
194   if (type == NORM_INFINITY) {
195     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
196   } else {
197     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
198   }
199   ierr = PetscFree(work);CHKERRQ(ierr);
200   if (type == NORM_2) {
201     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
202   }
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNCT__
207 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
208 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
209 {
210   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
211   IS              sis,gis;
212   PetscErrorCode  ierr;
213   const PetscInt  *isis,*igis;
214   PetscInt        n,*iis,nsis,ngis,rstart,i;
215 
216   PetscFunctionBegin;
217   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
218   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
219   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
220   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
221   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
223 
224   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
225   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
226   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
227   n    = ngis + nsis;
228   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
230   for (i=0; i<n; i++) iis[i] += rstart;
231   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
232 
233   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
234   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
235   ierr = ISDestroy(&sis);CHKERRQ(ierr);
236   ierr = ISDestroy(&gis);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatDistribute_MPIAIJ"
242 /*
243     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
244     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
245 
246     Only for square matrices
247 
248     Used by a preconditioner, hence PETSC_EXTERN
249 */
250 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
251 {
252   PetscMPIInt    rank,size;
253   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
254   PetscErrorCode ierr;
255   Mat            mat;
256   Mat_SeqAIJ     *gmata;
257   PetscMPIInt    tag;
258   MPI_Status     status;
259   PetscBool      aij;
260   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
261 
262   PetscFunctionBegin;
263   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
264   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
265   if (!rank) {
266     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
267     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
268   }
269   if (reuse == MAT_INITIAL_MATRIX) {
270     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
271     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
272     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
273     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
274     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
275     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
276     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
277     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
278     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
279 
280     rowners[0] = 0;
281     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
282     rstart = rowners[rank];
283     rend   = rowners[rank+1];
284     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
285     if (!rank) {
286       gmata = (Mat_SeqAIJ*) gmat->data;
287       /* send row lengths to all processors */
288       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
289       for (i=1; i<size; i++) {
290         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
291       }
292       /* determine number diagonal and off-diagonal counts */
293       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
294       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
295       jj   = 0;
296       for (i=0; i<m; i++) {
297         for (j=0; j<dlens[i]; j++) {
298           if (gmata->j[jj] < rstart) ld[i]++;
299           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
300           jj++;
301         }
302       }
303       /* send column indices to other processes */
304       for (i=1; i<size; i++) {
305         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
306         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
307         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
308       }
309 
310       /* send numerical values to other processes */
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       gmataa = gmata->a;
316       gmataj = gmata->j;
317 
318     } else {
319       /* receive row lengths */
320       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
321       /* receive column indices */
322       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
323       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
325       /* determine number diagonal and off-diagonal counts */
326       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
327       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
328       jj   = 0;
329       for (i=0; i<m; i++) {
330         for (j=0; j<dlens[i]; j++) {
331           if (gmataj[jj] < rstart) ld[i]++;
332           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
333           jj++;
334         }
335       }
336       /* receive numerical values */
337       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
339     }
340     /* set preallocation */
341     for (i=0; i<m; i++) {
342       dlens[i] -= olens[i];
343     }
344     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
345     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
346 
347     for (i=0; i<m; i++) {
348       dlens[i] += olens[i];
349     }
350     cnt = 0;
351     for (i=0; i<m; i++) {
352       row  = rstart + i;
353       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
354       cnt += dlens[i];
355     }
356     if (rank) {
357       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
358     }
359     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
360     ierr = PetscFree(rowners);CHKERRQ(ierr);
361 
362     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
363 
364     *inmat = mat;
365   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
366     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
367     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
368     mat  = *inmat;
369     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
370     if (!rank) {
371       /* send numerical values to other processes */
372       gmata  = (Mat_SeqAIJ*) gmat->data;
373       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
374       gmataa = gmata->a;
375       for (i=1; i<size; i++) {
376         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
377         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
378       }
379       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
380     } else {
381       /* receive numerical values from process 0*/
382       nz   = Ad->nz + Ao->nz;
383       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
384       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
385     }
386     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
387     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
388     ad = Ad->a;
389     ao = Ao->a;
390     if (mat->rmap->n) {
391       i  = 0;
392       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
393       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
394     }
395     for (i=1; i<mat->rmap->n; i++) {
396       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
397       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
398     }
399     i--;
400     if (mat->rmap->n) {
401       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (rank) {
404       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
405     }
406   }
407   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
408   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 /*
413   Local utility routine that creates a mapping from the global column
414 number to the local number in the off-diagonal part of the local
415 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
416 a slightly higher hash table cost; without it it is not scalable (each processor
417 has an order N integer array but is fast to acess.
418 */
419 #undef __FUNCT__
420 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
421 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
422 {
423   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
424   PetscErrorCode ierr;
425   PetscInt       n = aij->B->cmap->n,i;
426 
427   PetscFunctionBegin;
428   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
429 #if defined(PETSC_USE_CTABLE)
430   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
431   for (i=0; i<n; i++) {
432     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
433   }
434 #else
435   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
436   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
437   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
438 #endif
439   PetscFunctionReturn(0);
440 }
441 
442 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
443 { \
444     if (col <= lastcol1)  low1 = 0;     \
445     else                 high1 = nrow1; \
446     lastcol1 = col;\
447     while (high1-low1 > 5) { \
448       t = (low1+high1)/2; \
449       if (rp1[t] > col) high1 = t; \
450       else              low1  = t; \
451     } \
452       for (_i=low1; _i<high1; _i++) { \
453         if (rp1[_i] > col) break; \
454         if (rp1[_i] == col) { \
455           if (addv == ADD_VALUES) ap1[_i] += value;   \
456           else                    ap1[_i] = value; \
457           goto a_noinsert; \
458         } \
459       }  \
460       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
461       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
462       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
463       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
464       N = nrow1++ - 1; a->nz++; high1++; \
465       /* shift up all the later entries in this row */ \
466       for (ii=N; ii>=_i; ii--) { \
467         rp1[ii+1] = rp1[ii]; \
468         ap1[ii+1] = ap1[ii]; \
469       } \
470       rp1[_i] = col;  \
471       ap1[_i] = value;  \
472       A->nonzerostate++;\
473       a_noinsert: ; \
474       ailen[row] = nrow1; \
475 }
476 
477 
478 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
479   { \
480     if (col <= lastcol2) low2 = 0;                        \
481     else high2 = nrow2;                                   \
482     lastcol2 = col;                                       \
483     while (high2-low2 > 5) {                              \
484       t = (low2+high2)/2;                                 \
485       if (rp2[t] > col) high2 = t;                        \
486       else             low2  = t;                         \
487     }                                                     \
488     for (_i=low2; _i<high2; _i++) {                       \
489       if (rp2[_i] > col) break;                           \
490       if (rp2[_i] == col) {                               \
491         if (addv == ADD_VALUES) ap2[_i] += value;         \
492         else                    ap2[_i] = value;          \
493         goto b_noinsert;                                  \
494       }                                                   \
495     }                                                     \
496     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
497     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
498     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
499     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
500     N = nrow2++ - 1; b->nz++; high2++;                    \
501     /* shift up all the later entries in this row */      \
502     for (ii=N; ii>=_i; ii--) {                            \
503       rp2[ii+1] = rp2[ii];                                \
504       ap2[ii+1] = ap2[ii];                                \
505     }                                                     \
506     rp2[_i] = col;                                        \
507     ap2[_i] = value;                                      \
508     B->nonzerostate++;                                    \
509     b_noinsert: ;                                         \
510     bilen[row] = nrow2;                                   \
511   }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
515 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
516 {
517   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
518   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
519   PetscErrorCode ierr;
520   PetscInt       l,*garray = mat->garray,diag;
521 
522   PetscFunctionBegin;
523   /* code only works for square matrices A */
524 
525   /* find size of row to the left of the diagonal part */
526   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
527   row  = row - diag;
528   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
529     if (garray[b->j[b->i[row]+l]] > diag) break;
530   }
531   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
532 
533   /* diagonal part */
534   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
535 
536   /* right of diagonal part */
537   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatSetValues_MPIAIJ"
543 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
544 {
545   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
546   PetscScalar    value;
547   PetscErrorCode ierr;
548   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
549   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
550   PetscBool      roworiented = aij->roworiented;
551 
552   /* Some Variables required in the macro */
553   Mat        A                 = aij->A;
554   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
555   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
556   MatScalar  *aa               = a->a;
557   PetscBool  ignorezeroentries = a->ignorezeroentries;
558   Mat        B                 = aij->B;
559   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
560   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
561   MatScalar  *ba               = b->a;
562 
563   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
564   PetscInt  nonew;
565   MatScalar *ap1,*ap2;
566 
567   PetscFunctionBegin;
568   for (i=0; i<m; i++) {
569     if (im[i] < 0) continue;
570 #if defined(PETSC_USE_DEBUG)
571     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
572 #endif
573     if (im[i] >= rstart && im[i] < rend) {
574       row      = im[i] - rstart;
575       lastcol1 = -1;
576       rp1      = aj + ai[row];
577       ap1      = aa + ai[row];
578       rmax1    = aimax[row];
579       nrow1    = ailen[row];
580       low1     = 0;
581       high1    = nrow1;
582       lastcol2 = -1;
583       rp2      = bj + bi[row];
584       ap2      = ba + bi[row];
585       rmax2    = bimax[row];
586       nrow2    = bilen[row];
587       low2     = 0;
588       high2    = nrow2;
589 
590       for (j=0; j<n; j++) {
591         if (roworiented) value = v[i*n+j];
592         else             value = v[i+j*m];
593         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
594         if (in[j] >= cstart && in[j] < cend) {
595           col   = in[j] - cstart;
596           nonew = a->nonew;
597           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
598         } else if (in[j] < 0) continue;
599 #if defined(PETSC_USE_DEBUG)
600         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
601 #endif
602         else {
603           if (mat->was_assembled) {
604             if (!aij->colmap) {
605               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606             }
607 #if defined(PETSC_USE_CTABLE)
608             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
609             col--;
610 #else
611             col = aij->colmap[in[j]] - 1;
612 #endif
613             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
614               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
615               col  =  in[j];
616               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
617               B     = aij->B;
618               b     = (Mat_SeqAIJ*)B->data;
619               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
620               rp2   = bj + bi[row];
621               ap2   = ba + bi[row];
622               rmax2 = bimax[row];
623               nrow2 = bilen[row];
624               low2  = 0;
625               high2 = nrow2;
626               bm    = aij->B->rmap->n;
627               ba    = b->a;
628             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
629           } else col = in[j];
630           nonew = b->nonew;
631           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
632         }
633       }
634     } else {
635       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
636       if (!aij->donotstash) {
637         mat->assembled = PETSC_FALSE;
638         if (roworiented) {
639           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
640         } else {
641           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
642         }
643       }
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatGetValues_MPIAIJ"
651 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   PetscErrorCode ierr;
655   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
656   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
657 
658   PetscFunctionBegin;
659   for (i=0; i<m; i++) {
660     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
661     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
662     if (idxm[i] >= rstart && idxm[i] < rend) {
663       row = idxm[i] - rstart;
664       for (j=0; j<n; j++) {
665         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
666         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
667         if (idxn[j] >= cstart && idxn[j] < cend) {
668           col  = idxn[j] - cstart;
669           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
670         } else {
671           if (!aij->colmap) {
672             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
673           }
674 #if defined(PETSC_USE_CTABLE)
675           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
676           col--;
677 #else
678           col = aij->colmap[idxn[j]] - 1;
679 #endif
680           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
681           else {
682             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
683           }
684         }
685       }
686     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
695 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
696 {
697   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
698   PetscErrorCode ierr;
699   PetscInt       nstash,reallocs;
700 
701   PetscFunctionBegin;
702   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
703 
704   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
705   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
706   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
712 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
713 {
714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
715   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
716   PetscErrorCode ierr;
717   PetscMPIInt    n;
718   PetscInt       i,j,rstart,ncols,flg;
719   PetscInt       *row,*col;
720   PetscBool      other_disassembled;
721   PetscScalar    *val;
722 
723   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
724 
725   PetscFunctionBegin;
726   if (!aij->donotstash && !mat->nooffprocentries) {
727     while (1) {
728       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
729       if (!flg) break;
730 
731       for (i=0; i<n; ) {
732         /* Now identify the consecutive vals belonging to the same row */
733         for (j=i,rstart=row[j]; j<n; j++) {
734           if (row[j] != rstart) break;
735         }
736         if (j < n) ncols = j-i;
737         else       ncols = n-i;
738         /* Now assemble all these values with a single function call */
739         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
740 
741         i = j;
742       }
743     }
744     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
745   }
746   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
747   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
748 
749   /* determine if any processor has disassembled, if so we must
750      also disassemble ourselfs, in order that we may reassemble. */
751   /*
752      if nonzero structure of submatrix B cannot change then we know that
753      no processor disassembled thus we can skip this stuff
754   */
755   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
756     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
757     if (mat->was_assembled && !other_disassembled) {
758       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
759     }
760   }
761   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
762     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
763   }
764   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
765   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
766   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
767 
768   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
769 
770   aij->rowvalues = 0;
771 
772   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
773   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
774 
775   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
776   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
777     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
778     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
785 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
786 {
787   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
792   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatZeroRows_MPIAIJ"
798 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
799 {
800   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
801   PetscInt      *lrows;
802   PetscInt       r, len;
803   PetscErrorCode ierr;
804 
805   PetscFunctionBegin;
806   /* get locally owned rows */
807   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
808   /* fix right hand side if needed */
809   if (x && b) {
810     const PetscScalar *xx;
811     PetscScalar       *bb;
812 
813     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
814     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
815     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
816     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
817     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
818   }
819   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
820   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
821   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
822     PetscBool cong;
823     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
824     if (cong) A->congruentlayouts = 1;
825     else      A->congruentlayouts = 0;
826   }
827   if ((diag != 0.0) && A->congruentlayouts) {
828     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
829   } else if (diag != 0.0) {
830     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
831     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
832     for (r = 0; r < len; ++r) {
833       const PetscInt row = lrows[r] + A->rmap->rstart;
834       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
835     }
836     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
837     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   } else {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840   }
841   ierr = PetscFree(lrows);CHKERRQ(ierr);
842 
843   /* only change matrix nonzero state if pattern was allowed to be changed */
844   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
845     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
853 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
854 {
855   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
856   PetscErrorCode    ierr;
857   PetscMPIInt       n = A->rmap->n;
858   PetscInt          i,j,r,m,p = 0,len = 0;
859   PetscInt          *lrows,*owners = A->rmap->range;
860   PetscSFNode       *rrows;
861   PetscSF           sf;
862   const PetscScalar *xx;
863   PetscScalar       *bb,*mask;
864   Vec               xmask,lmask;
865   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
866   const PetscInt    *aj, *ii,*ridx;
867   PetscScalar       *aa;
868 
869   PetscFunctionBegin;
870   /* Create SF where leaves are input rows and roots are owned rows */
871   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
872   for (r = 0; r < n; ++r) lrows[r] = -1;
873   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
874   for (r = 0; r < N; ++r) {
875     const PetscInt idx   = rows[r];
876     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
877     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
878       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
879     }
880     rrows[r].rank  = p;
881     rrows[r].index = rows[r] - owners[p];
882   }
883   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
884   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
885   /* Collect flags for rows to be zeroed */
886   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
887   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
888   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
889   /* Compress and put in row numbers */
890   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
891   /* zero diagonal part of matrix */
892   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
893   /* handle off diagonal part of matrix */
894   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
895   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
896   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
897   for (i=0; i<len; i++) bb[lrows[i]] = 1;
898   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
899   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
902   if (x) {
903     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
906     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
907   }
908   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
909   /* remove zeroed rows of off diagonal matrix */
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij->a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij->a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x) {
951     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
952     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
953   }
954   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
955   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
956   ierr = PetscFree(lrows);CHKERRQ(ierr);
957 
958   /* only change matrix nonzero state if pattern was allowed to be changed */
959   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
960     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
961     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMult_MPIAIJ"
968 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
969 {
970   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
971   PetscErrorCode ierr;
972   PetscInt       nt;
973 
974   PetscFunctionBegin;
975   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
976   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
977   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
979   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
986 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultAdd_MPIAIJ"
998 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1013 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1014 {
1015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1016   PetscErrorCode ierr;
1017   PetscBool      merged;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1021   /* do nondiagonal part */
1022   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1023   if (!merged) {
1024     /* send it on its way */
1025     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1026     /* do local part */
1027     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1028     /* receive remote parts: note this assumes the values are not actually */
1029     /* added in yy until the next line, */
1030     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1031   } else {
1032     /* do local part */
1033     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1034     /* send it on its way */
1035     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1036     /* values actually were received in the Begin() but we need to call this nop */
1037     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1038   }
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1044 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1045 {
1046   MPI_Comm       comm;
1047   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1048   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1049   IS             Me,Notme;
1050   PetscErrorCode ierr;
1051   PetscInt       M,N,first,last,*notme,i;
1052   PetscMPIInt    size;
1053 
1054   PetscFunctionBegin;
1055   /* Easy test: symmetric diagonal block */
1056   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1057   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1058   if (!*f) PetscFunctionReturn(0);
1059   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1060   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1061   if (size == 1) PetscFunctionReturn(0);
1062 
1063   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1064   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1065   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1066   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1067   for (i=0; i<first; i++) notme[i] = i;
1068   for (i=last; i<M; i++) notme[i-last+first] = i;
1069   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1070   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1071   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1072   Aoff = Aoffs[0];
1073   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1074   Boff = Boffs[0];
1075   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1076   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1077   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1078   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1079   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1080   ierr = PetscFree(notme);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1086 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092   /* do nondiagonal part */
1093   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1094   /* send it on its way */
1095   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1096   /* do local part */
1097   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1098   /* receive remote parts */
1099   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 /*
1104   This only works correctly for square matrices where the subblock A->A is the
1105    diagonal block
1106 */
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1109 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1110 {
1111   PetscErrorCode ierr;
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113 
1114   PetscFunctionBegin;
1115   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1116   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1117   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatScale_MPIAIJ"
1123 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1124 {
1125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1126   PetscErrorCode ierr;
1127 
1128   PetscFunctionBegin;
1129   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1130   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatDestroy_MPIAIJ"
1136 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1137 {
1138   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142 #if defined(PETSC_USE_LOG)
1143   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1144 #endif
1145   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1146   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1147   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1148   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1149 #if defined(PETSC_USE_CTABLE)
1150   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1151 #else
1152   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1153 #endif
1154   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1156   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1157   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1158   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1159   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1160 
1161   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1169 #if defined(PETSC_HAVE_ELEMENTAL)
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1171 #endif
1172 #if defined(PETSC_HAVE_HYPRE)
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1175 #endif
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1181 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1182 {
1183   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1184   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1185   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1186   PetscErrorCode ierr;
1187   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1188   int            fd;
1189   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1190   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1191   PetscScalar    *column_values;
1192   PetscInt       message_count,flowcontrolcount;
1193   FILE           *file;
1194 
1195   PetscFunctionBegin;
1196   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1197   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1198   nz   = A->nz + B->nz;
1199   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1200   if (!rank) {
1201     header[0] = MAT_FILE_CLASSID;
1202     header[1] = mat->rmap->N;
1203     header[2] = mat->cmap->N;
1204 
1205     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1206     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1207     /* get largest number of rows any processor has */
1208     rlen  = mat->rmap->n;
1209     range = mat->rmap->range;
1210     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1211   } else {
1212     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1213     rlen = mat->rmap->n;
1214   }
1215 
1216   /* load up the local row counts */
1217   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1218   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1219 
1220   /* store the row lengths to the file */
1221   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1222   if (!rank) {
1223     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1224     for (i=1; i<size; i++) {
1225       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1226       rlen = range[i+1] - range[i];
1227       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1229     }
1230     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1231   } else {
1232     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1233     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1235   }
1236   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1237 
1238   /* load up the local column indices */
1239   nzmax = nz; /* th processor needs space a largest processor needs */
1240   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1242   cnt   = 0;
1243   for (i=0; i<mat->rmap->n; i++) {
1244     for (j=B->i[i]; j<B->i[i+1]; j++) {
1245       if ((col = garray[B->j[j]]) > cstart) break;
1246       column_indices[cnt++] = col;
1247     }
1248     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1249     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1250   }
1251   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1252 
1253   /* store the column indices to the file */
1254   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1255   if (!rank) {
1256     MPI_Status status;
1257     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1258     for (i=1; i<size; i++) {
1259       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1260       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1261       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1262       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     }
1265     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1266   } else {
1267     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1268     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1271   }
1272   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1273 
1274   /* load up the local column values */
1275   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1276   cnt  = 0;
1277   for (i=0; i<mat->rmap->n; i++) {
1278     for (j=B->i[i]; j<B->i[i+1]; j++) {
1279       if (garray[B->j[j]] > cstart) break;
1280       column_values[cnt++] = B->a[j];
1281     }
1282     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1283     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1284   }
1285   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1286 
1287   /* store the column values to the file */
1288   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1289   if (!rank) {
1290     MPI_Status status;
1291     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1292     for (i=1; i<size; i++) {
1293       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1294       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1295       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1296       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1297       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1298     }
1299     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1300   } else {
1301     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1302     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1304     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1305   }
1306   ierr = PetscFree(column_values);CHKERRQ(ierr);
1307 
1308   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1309   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #include <petscdraw.h>
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1316 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1317 {
1318   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1319   PetscErrorCode    ierr;
1320   PetscMPIInt       rank = aij->rank,size = aij->size;
1321   PetscBool         isdraw,iascii,isbinary;
1322   PetscViewer       sviewer;
1323   PetscViewerFormat format;
1324 
1325   PetscFunctionBegin;
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1327   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1328   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1329   if (iascii) {
1330     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1331     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1332       MatInfo   info;
1333       PetscBool inodes;
1334 
1335       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1336       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1337       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1338       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1339       if (!inodes) {
1340         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1341                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1342       } else {
1343         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1344                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1345       }
1346       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1348       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1349       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1350       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1352       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1353       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1354       PetscFunctionReturn(0);
1355     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1356       PetscInt inodecount,inodelimit,*inodes;
1357       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1358       if (inodes) {
1359         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1360       } else {
1361         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1362       }
1363       PetscFunctionReturn(0);
1364     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1365       PetscFunctionReturn(0);
1366     }
1367   } else if (isbinary) {
1368     if (size == 1) {
1369       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1370       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1371     } else {
1372       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1373     }
1374     PetscFunctionReturn(0);
1375   } else if (isdraw) {
1376     PetscDraw draw;
1377     PetscBool isnull;
1378     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1379     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1380     if (isnull) PetscFunctionReturn(0);
1381   }
1382 
1383   {
1384     /* assemble the entire matrix onto first processor. */
1385     Mat        A;
1386     Mat_SeqAIJ *Aloc;
1387     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1388     MatScalar  *a;
1389 
1390     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1391     if (!rank) {
1392       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1393     } else {
1394       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1395     }
1396     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1397     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1398     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1399     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1400     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1401 
1402     /* copy over the A part */
1403     Aloc = (Mat_SeqAIJ*)aij->A->data;
1404     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1405     row  = mat->rmap->rstart;
1406     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1407     for (i=0; i<m; i++) {
1408       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1409       row++;
1410       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1411     }
1412     aj = Aloc->j;
1413     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1414 
1415     /* copy over the B part */
1416     Aloc = (Mat_SeqAIJ*)aij->B->data;
1417     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1418     row  = mat->rmap->rstart;
1419     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1420     ct   = cols;
1421     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1422     for (i=0; i<m; i++) {
1423       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1424       row++;
1425       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1426     }
1427     ierr = PetscFree(ct);CHKERRQ(ierr);
1428     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1430     /*
1431        Everyone has to call to draw the matrix since the graphics waits are
1432        synchronized across all processors that share the PetscDraw object
1433     */
1434     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1435     if (!rank) {
1436       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1437       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1438     }
1439     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1440     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1441     ierr = MatDestroy(&A);CHKERRQ(ierr);
1442   }
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatView_MPIAIJ"
1448 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1449 {
1450   PetscErrorCode ierr;
1451   PetscBool      iascii,isdraw,issocket,isbinary;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1458   if (iascii || isdraw || isbinary || issocket) {
1459     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1460   }
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatSOR_MPIAIJ"
1466 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1467 {
1468   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1469   PetscErrorCode ierr;
1470   Vec            bb1 = 0;
1471   PetscBool      hasop;
1472 
1473   PetscFunctionBegin;
1474   if (flag == SOR_APPLY_UPPER) {
1475     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1476     PetscFunctionReturn(0);
1477   }
1478 
1479   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1480     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1481   }
1482 
1483   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1484     if (flag & SOR_ZERO_INITIAL_GUESS) {
1485       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1486       its--;
1487     }
1488 
1489     while (its--) {
1490       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492 
1493       /* update rhs: bb1 = bb - B*x */
1494       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1495       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1496 
1497       /* local sweep */
1498       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1499     }
1500   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1501     if (flag & SOR_ZERO_INITIAL_GUESS) {
1502       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1503       its--;
1504     }
1505     while (its--) {
1506       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1508 
1509       /* update rhs: bb1 = bb - B*x */
1510       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1511       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1512 
1513       /* local sweep */
1514       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1515     }
1516   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1517     if (flag & SOR_ZERO_INITIAL_GUESS) {
1518       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1519       its--;
1520     }
1521     while (its--) {
1522       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524 
1525       /* update rhs: bb1 = bb - B*x */
1526       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1527       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1528 
1529       /* local sweep */
1530       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1531     }
1532   } else if (flag & SOR_EISENSTAT) {
1533     Vec xx1;
1534 
1535     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1537 
1538     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1540     if (!mat->diag) {
1541       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1542       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1543     }
1544     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1545     if (hasop) {
1546       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1547     } else {
1548       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1549     }
1550     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1551 
1552     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1553 
1554     /* local sweep */
1555     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1556     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1557     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1558   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1559 
1560   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1561 
1562   matin->factorerrortype = mat->A->factorerrortype;
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatPermute_MPIAIJ"
1568 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1569 {
1570   Mat            aA,aB,Aperm;
1571   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1572   PetscScalar    *aa,*ba;
1573   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1574   PetscSF        rowsf,sf;
1575   IS             parcolp = NULL;
1576   PetscBool      done;
1577   PetscErrorCode ierr;
1578 
1579   PetscFunctionBegin;
1580   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1581   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1582   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1583   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1584 
1585   /* Invert row permutation to find out where my rows should go */
1586   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1587   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1588   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1589   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1590   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1591   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1592 
1593   /* Invert column permutation to find out where my columns should go */
1594   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1595   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1596   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1597   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1598   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1599   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1600   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1601 
1602   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1603   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1604   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1605 
1606   /* Find out where my gcols should go */
1607   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1608   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1609   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1610   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1611   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1612   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1613   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1614   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1615 
1616   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1617   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1618   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1619   for (i=0; i<m; i++) {
1620     PetscInt row = rdest[i],rowner;
1621     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1622     for (j=ai[i]; j<ai[i+1]; j++) {
1623       PetscInt cowner,col = cdest[aj[j]];
1624       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1625       if (rowner == cowner) dnnz[i]++;
1626       else onnz[i]++;
1627     }
1628     for (j=bi[i]; j<bi[i+1]; j++) {
1629       PetscInt cowner,col = gcdest[bj[j]];
1630       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1631       if (rowner == cowner) dnnz[i]++;
1632       else onnz[i]++;
1633     }
1634   }
1635   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1636   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1637   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1638   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1639   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1640 
1641   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1642   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1643   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1644   for (i=0; i<m; i++) {
1645     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1646     PetscInt j0,rowlen;
1647     rowlen = ai[i+1] - ai[i];
1648     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1649       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1650       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1651     }
1652     rowlen = bi[i+1] - bi[i];
1653     for (j0=j=0; j<rowlen; j0=j) {
1654       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1655       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1656     }
1657   }
1658   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1659   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1660   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1661   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1662   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1663   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1664   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1665   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1666   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1667   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1668   *B = Aperm;
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 #undef __FUNCT__
1673 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1674 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1675 {
1676   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1677   PetscErrorCode ierr;
1678 
1679   PetscFunctionBegin;
1680   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1681   if (ghosts) *ghosts = aij->garray;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1687 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1688 {
1689   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1690   Mat            A    = mat->A,B = mat->B;
1691   PetscErrorCode ierr;
1692   PetscReal      isend[5],irecv[5];
1693 
1694   PetscFunctionBegin;
1695   info->block_size = 1.0;
1696   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1697 
1698   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1699   isend[3] = info->memory;  isend[4] = info->mallocs;
1700 
1701   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1702 
1703   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1704   isend[3] += info->memory;  isend[4] += info->mallocs;
1705   if (flag == MAT_LOCAL) {
1706     info->nz_used      = isend[0];
1707     info->nz_allocated = isend[1];
1708     info->nz_unneeded  = isend[2];
1709     info->memory       = isend[3];
1710     info->mallocs      = isend[4];
1711   } else if (flag == MAT_GLOBAL_MAX) {
1712     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1713 
1714     info->nz_used      = irecv[0];
1715     info->nz_allocated = irecv[1];
1716     info->nz_unneeded  = irecv[2];
1717     info->memory       = irecv[3];
1718     info->mallocs      = irecv[4];
1719   } else if (flag == MAT_GLOBAL_SUM) {
1720     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1721 
1722     info->nz_used      = irecv[0];
1723     info->nz_allocated = irecv[1];
1724     info->nz_unneeded  = irecv[2];
1725     info->memory       = irecv[3];
1726     info->mallocs      = irecv[4];
1727   }
1728   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1729   info->fill_ratio_needed = 0;
1730   info->factor_mallocs    = 0;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatSetOption_MPIAIJ"
1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1737 {
1738   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   switch (op) {
1743   case MAT_NEW_NONZERO_LOCATIONS:
1744   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1745   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1746   case MAT_KEEP_NONZERO_PATTERN:
1747   case MAT_NEW_NONZERO_LOCATION_ERR:
1748   case MAT_USE_INODES:
1749   case MAT_IGNORE_ZERO_ENTRIES:
1750     MatCheckPreallocated(A,1);
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_ROW_ORIENTED:
1755     MatCheckPreallocated(A,1);
1756     a->roworiented = flg;
1757 
1758     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1759     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1760     break;
1761   case MAT_NEW_DIAGONALS:
1762     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1763     break;
1764   case MAT_IGNORE_OFF_PROC_ENTRIES:
1765     a->donotstash = flg;
1766     break;
1767   case MAT_SPD:
1768     A->spd_set = PETSC_TRUE;
1769     A->spd     = flg;
1770     if (flg) {
1771       A->symmetric                  = PETSC_TRUE;
1772       A->structurally_symmetric     = PETSC_TRUE;
1773       A->symmetric_set              = PETSC_TRUE;
1774       A->structurally_symmetric_set = PETSC_TRUE;
1775     }
1776     break;
1777   case MAT_SYMMETRIC:
1778     MatCheckPreallocated(A,1);
1779     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1780     break;
1781   case MAT_STRUCTURALLY_SYMMETRIC:
1782     MatCheckPreallocated(A,1);
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     break;
1785   case MAT_HERMITIAN:
1786     MatCheckPreallocated(A,1);
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     break;
1789   case MAT_SYMMETRY_ETERNAL:
1790     MatCheckPreallocated(A,1);
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     break;
1793   case MAT_SUBMAT_SINGLEIS:
1794     A->submat_singleis = flg;
1795     break;
1796   default:
1797     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1798   }
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatGetRow_MPIAIJ"
1804 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1805 {
1806   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1807   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1808   PetscErrorCode ierr;
1809   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1810   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1811   PetscInt       *cmap,*idx_p;
1812 
1813   PetscFunctionBegin;
1814   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1815   mat->getrowactive = PETSC_TRUE;
1816 
1817   if (!mat->rowvalues && (idx || v)) {
1818     /*
1819         allocate enough space to hold information from the longest row.
1820     */
1821     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1822     PetscInt   max = 1,tmp;
1823     for (i=0; i<matin->rmap->n; i++) {
1824       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1825       if (max < tmp) max = tmp;
1826     }
1827     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1828   }
1829 
1830   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1831   lrow = row - rstart;
1832 
1833   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1834   if (!v)   {pvA = 0; pvB = 0;}
1835   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1836   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1837   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1838   nztot = nzA + nzB;
1839 
1840   cmap = mat->garray;
1841   if (v  || idx) {
1842     if (nztot) {
1843       /* Sort by increasing column numbers, assuming A and B already sorted */
1844       PetscInt imark = -1;
1845       if (v) {
1846         *v = v_p = mat->rowvalues;
1847         for (i=0; i<nzB; i++) {
1848           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1849           else break;
1850         }
1851         imark = i;
1852         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1853         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1854       }
1855       if (idx) {
1856         *idx = idx_p = mat->rowindices;
1857         if (imark > -1) {
1858           for (i=0; i<imark; i++) {
1859             idx_p[i] = cmap[cworkB[i]];
1860           }
1861         } else {
1862           for (i=0; i<nzB; i++) {
1863             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1864             else break;
1865           }
1866           imark = i;
1867         }
1868         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1869         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1870       }
1871     } else {
1872       if (idx) *idx = 0;
1873       if (v)   *v   = 0;
1874     }
1875   }
1876   *nz  = nztot;
1877   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1878   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1884 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1885 {
1886   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1887 
1888   PetscFunctionBegin;
1889   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1890   aij->getrowactive = PETSC_FALSE;
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatNorm_MPIAIJ"
1896 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1897 {
1898   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1899   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1900   PetscErrorCode ierr;
1901   PetscInt       i,j,cstart = mat->cmap->rstart;
1902   PetscReal      sum = 0.0;
1903   MatScalar      *v;
1904 
1905   PetscFunctionBegin;
1906   if (aij->size == 1) {
1907     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1908   } else {
1909     if (type == NORM_FROBENIUS) {
1910       v = amat->a;
1911       for (i=0; i<amat->nz; i++) {
1912         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1913       }
1914       v = bmat->a;
1915       for (i=0; i<bmat->nz; i++) {
1916         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1917       }
1918       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1919       *norm = PetscSqrtReal(*norm);
1920       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1921     } else if (type == NORM_1) { /* max column norm */
1922       PetscReal *tmp,*tmp2;
1923       PetscInt  *jj,*garray = aij->garray;
1924       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1925       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1926       *norm = 0.0;
1927       v     = amat->a; jj = amat->j;
1928       for (j=0; j<amat->nz; j++) {
1929         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1930       }
1931       v = bmat->a; jj = bmat->j;
1932       for (j=0; j<bmat->nz; j++) {
1933         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1934       }
1935       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1936       for (j=0; j<mat->cmap->N; j++) {
1937         if (tmp2[j] > *norm) *norm = tmp2[j];
1938       }
1939       ierr = PetscFree(tmp);CHKERRQ(ierr);
1940       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1941       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1942     } else if (type == NORM_INFINITY) { /* max row norm */
1943       PetscReal ntemp = 0.0;
1944       for (j=0; j<aij->A->rmap->n; j++) {
1945         v   = amat->a + amat->i[j];
1946         sum = 0.0;
1947         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1948           sum += PetscAbsScalar(*v); v++;
1949         }
1950         v = bmat->a + bmat->i[j];
1951         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1952           sum += PetscAbsScalar(*v); v++;
1953         }
1954         if (sum > ntemp) ntemp = sum;
1955       }
1956       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1957       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1958     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1959   }
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatTranspose_MPIAIJ"
1965 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1966 {
1967   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1968   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1969   PetscErrorCode ierr;
1970   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1971   PetscInt       cstart = A->cmap->rstart,ncol;
1972   Mat            B;
1973   MatScalar      *array;
1974 
1975   PetscFunctionBegin;
1976   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1977 
1978   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1979   ai = Aloc->i; aj = Aloc->j;
1980   bi = Bloc->i; bj = Bloc->j;
1981   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1982     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1983     PetscSFNode          *oloc;
1984     PETSC_UNUSED PetscSF sf;
1985 
1986     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1987     /* compute d_nnz for preallocation */
1988     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1989     for (i=0; i<ai[ma]; i++) {
1990       d_nnz[aj[i]]++;
1991       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1992     }
1993     /* compute local off-diagonal contributions */
1994     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1995     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1996     /* map those to global */
1997     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1998     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1999     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2000     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2001     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2002     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2003     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2004 
2005     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2006     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2007     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2008     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2009     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2010     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2011   } else {
2012     B    = *matout;
2013     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2014     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2015   }
2016 
2017   /* copy over the A part */
2018   array = Aloc->a;
2019   row   = A->rmap->rstart;
2020   for (i=0; i<ma; i++) {
2021     ncol = ai[i+1]-ai[i];
2022     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2023     row++;
2024     array += ncol; aj += ncol;
2025   }
2026   aj = Aloc->j;
2027   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2028 
2029   /* copy over the B part */
2030   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2031   array = Bloc->a;
2032   row   = A->rmap->rstart;
2033   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2034   cols_tmp = cols;
2035   for (i=0; i<mb; i++) {
2036     ncol = bi[i+1]-bi[i];
2037     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2038     row++;
2039     array += ncol; cols_tmp += ncol;
2040   }
2041   ierr = PetscFree(cols);CHKERRQ(ierr);
2042 
2043   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2045   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2046     *matout = B;
2047   } else {
2048     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2049   }
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2055 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2056 {
2057   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2058   Mat            a    = aij->A,b = aij->B;
2059   PetscErrorCode ierr;
2060   PetscInt       s1,s2,s3;
2061 
2062   PetscFunctionBegin;
2063   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2064   if (rr) {
2065     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2066     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2067     /* Overlap communication with computation. */
2068     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2069   }
2070   if (ll) {
2071     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2072     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2073     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2074   }
2075   /* scale  the diagonal block */
2076   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2077 
2078   if (rr) {
2079     /* Do a scatter end and then right scale the off-diagonal block */
2080     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2081     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2082   }
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2088 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2089 {
2090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2091   PetscErrorCode ierr;
2092 
2093   PetscFunctionBegin;
2094   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatEqual_MPIAIJ"
2100 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2101 {
2102   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2103   Mat            a,b,c,d;
2104   PetscBool      flg;
2105   PetscErrorCode ierr;
2106 
2107   PetscFunctionBegin;
2108   a = matA->A; b = matA->B;
2109   c = matB->A; d = matB->B;
2110 
2111   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2112   if (flg) {
2113     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2114   }
2115   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatCopy_MPIAIJ"
2121 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2122 {
2123   PetscErrorCode ierr;
2124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2125   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2126 
2127   PetscFunctionBegin;
2128   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2129   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2130     /* because of the column compression in the off-processor part of the matrix a->B,
2131        the number of columns in a->B and b->B may be different, hence we cannot call
2132        the MatCopy() directly on the two parts. If need be, we can provide a more
2133        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2134        then copying the submatrices */
2135     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2136   } else {
2137     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2138     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2139   }
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 #undef __FUNCT__
2144 #define __FUNCT__ "MatSetUp_MPIAIJ"
2145 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2146 {
2147   PetscErrorCode ierr;
2148 
2149   PetscFunctionBegin;
2150   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 /*
2155    Computes the number of nonzeros per row needed for preallocation when X and Y
2156    have different nonzero structure.
2157 */
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2160 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2161 {
2162   PetscInt       i,j,k,nzx,nzy;
2163 
2164   PetscFunctionBegin;
2165   /* Set the number of nonzeros in the new matrix */
2166   for (i=0; i<m; i++) {
2167     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2168     nzx = xi[i+1] - xi[i];
2169     nzy = yi[i+1] - yi[i];
2170     nnz[i] = 0;
2171     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2172       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2173       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2174       nnz[i]++;
2175     }
2176     for (; k<nzy; k++) nnz[i]++;
2177   }
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2184 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2185 {
2186   PetscErrorCode ierr;
2187   PetscInt       m = Y->rmap->N;
2188   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2189   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2190 
2191   PetscFunctionBegin;
2192   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatAXPY_MPIAIJ"
2198 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2199 {
2200   PetscErrorCode ierr;
2201   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2202   PetscBLASInt   bnz,one=1;
2203   Mat_SeqAIJ     *x,*y;
2204 
2205   PetscFunctionBegin;
2206   if (str == SAME_NONZERO_PATTERN) {
2207     PetscScalar alpha = a;
2208     x    = (Mat_SeqAIJ*)xx->A->data;
2209     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2210     y    = (Mat_SeqAIJ*)yy->A->data;
2211     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2212     x    = (Mat_SeqAIJ*)xx->B->data;
2213     y    = (Mat_SeqAIJ*)yy->B->data;
2214     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2215     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2216     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2217   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2218     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2219   } else {
2220     Mat      B;
2221     PetscInt *nnz_d,*nnz_o;
2222     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2223     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2224     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2225     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2226     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2227     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2228     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2229     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2230     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2231     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2232     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2233     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2234     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2235     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2236   }
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2241 
2242 #undef __FUNCT__
2243 #define __FUNCT__ "MatConjugate_MPIAIJ"
2244 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2245 {
2246 #if defined(PETSC_USE_COMPLEX)
2247   PetscErrorCode ierr;
2248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2249 
2250   PetscFunctionBegin;
2251   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2252   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2253 #else
2254   PetscFunctionBegin;
2255 #endif
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatRealPart_MPIAIJ"
2261 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2262 {
2263   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2264   PetscErrorCode ierr;
2265 
2266   PetscFunctionBegin;
2267   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2268   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2274 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2275 {
2276   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2277   PetscErrorCode ierr;
2278 
2279   PetscFunctionBegin;
2280   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2281   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2287 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2288 {
2289   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2290   PetscErrorCode ierr;
2291   PetscInt       i,*idxb = 0;
2292   PetscScalar    *va,*vb;
2293   Vec            vtmp;
2294 
2295   PetscFunctionBegin;
2296   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2297   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2298   if (idx) {
2299     for (i=0; i<A->rmap->n; i++) {
2300       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2301     }
2302   }
2303 
2304   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2305   if (idx) {
2306     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2307   }
2308   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2309   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2310 
2311   for (i=0; i<A->rmap->n; i++) {
2312     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2313       va[i] = vb[i];
2314       if (idx) idx[i] = a->garray[idxb[i]];
2315     }
2316   }
2317 
2318   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2319   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2320   ierr = PetscFree(idxb);CHKERRQ(ierr);
2321   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2327 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2328 {
2329   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2330   PetscErrorCode ierr;
2331   PetscInt       i,*idxb = 0;
2332   PetscScalar    *va,*vb;
2333   Vec            vtmp;
2334 
2335   PetscFunctionBegin;
2336   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2337   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2338   if (idx) {
2339     for (i=0; i<A->cmap->n; i++) {
2340       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2341     }
2342   }
2343 
2344   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2345   if (idx) {
2346     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2347   }
2348   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2349   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2350 
2351   for (i=0; i<A->rmap->n; i++) {
2352     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2353       va[i] = vb[i];
2354       if (idx) idx[i] = a->garray[idxb[i]];
2355     }
2356   }
2357 
2358   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2359   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2360   ierr = PetscFree(idxb);CHKERRQ(ierr);
2361   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2367 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2368 {
2369   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2370   PetscInt       n      = A->rmap->n;
2371   PetscInt       cstart = A->cmap->rstart;
2372   PetscInt       *cmap  = mat->garray;
2373   PetscInt       *diagIdx, *offdiagIdx;
2374   Vec            diagV, offdiagV;
2375   PetscScalar    *a, *diagA, *offdiagA;
2376   PetscInt       r;
2377   PetscErrorCode ierr;
2378 
2379   PetscFunctionBegin;
2380   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2381   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2382   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2383   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2384   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2385   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2386   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2387   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2388   for (r = 0; r < n; ++r) {
2389     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2390       a[r]   = diagA[r];
2391       idx[r] = cstart + diagIdx[r];
2392     } else {
2393       a[r]   = offdiagA[r];
2394       idx[r] = cmap[offdiagIdx[r]];
2395     }
2396   }
2397   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2398   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2399   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2400   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2401   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2402   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2408 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2409 {
2410   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2411   PetscInt       n      = A->rmap->n;
2412   PetscInt       cstart = A->cmap->rstart;
2413   PetscInt       *cmap  = mat->garray;
2414   PetscInt       *diagIdx, *offdiagIdx;
2415   Vec            diagV, offdiagV;
2416   PetscScalar    *a, *diagA, *offdiagA;
2417   PetscInt       r;
2418   PetscErrorCode ierr;
2419 
2420   PetscFunctionBegin;
2421   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2422   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2423   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2424   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2425   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2426   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2427   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2428   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2429   for (r = 0; r < n; ++r) {
2430     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2431       a[r]   = diagA[r];
2432       idx[r] = cstart + diagIdx[r];
2433     } else {
2434       a[r]   = offdiagA[r];
2435       idx[r] = cmap[offdiagIdx[r]];
2436     }
2437   }
2438   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2439   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2440   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2441   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2442   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2443   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2449 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2450 {
2451   PetscErrorCode ierr;
2452   Mat            *dummy;
2453 
2454   PetscFunctionBegin;
2455   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2456   *newmat = *dummy;
2457   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2463 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2464 {
2465   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2466   PetscErrorCode ierr;
2467 
2468   PetscFunctionBegin;
2469   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2470   A->factorerrortype = a->A->factorerrortype;
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 #undef __FUNCT__
2475 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2476 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2477 {
2478   PetscErrorCode ierr;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2480 
2481   PetscFunctionBegin;
2482   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2483   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2484   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #undef __FUNCT__
2490 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2492 {
2493   PetscFunctionBegin;
2494   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2495   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2501 /*@
2502    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2503 
2504    Collective on Mat
2505 
2506    Input Parameters:
2507 +    A - the matrix
2508 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2509 
2510  Level: advanced
2511 
2512 @*/
2513 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2514 {
2515   PetscErrorCode       ierr;
2516 
2517   PetscFunctionBegin;
2518   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2524 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2525 {
2526   PetscErrorCode       ierr;
2527   PetscBool            sc = PETSC_FALSE,flg;
2528 
2529   PetscFunctionBegin;
2530   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2531   ierr = PetscObjectOptionsBegin((PetscObject)A);
2532     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2533     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2534     if (flg) {
2535       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2536     }
2537   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2538   PetscFunctionReturn(0);
2539 }
2540 
2541 #undef __FUNCT__
2542 #define __FUNCT__ "MatShift_MPIAIJ"
2543 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2544 {
2545   PetscErrorCode ierr;
2546   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2547   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2548 
2549   PetscFunctionBegin;
2550   if (!Y->preallocated) {
2551     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2552   } else if (!aij->nz) {
2553     PetscInt nonew = aij->nonew;
2554     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2555     aij->nonew = nonew;
2556   }
2557   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2563 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2564 {
2565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2566   PetscErrorCode ierr;
2567 
2568   PetscFunctionBegin;
2569   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2570   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2571   if (d) {
2572     PetscInt rstart;
2573     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2574     *d += rstart;
2575 
2576   }
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 
2581 /* -------------------------------------------------------------------*/
2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2583                                        MatGetRow_MPIAIJ,
2584                                        MatRestoreRow_MPIAIJ,
2585                                        MatMult_MPIAIJ,
2586                                 /* 4*/ MatMultAdd_MPIAIJ,
2587                                        MatMultTranspose_MPIAIJ,
2588                                        MatMultTransposeAdd_MPIAIJ,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                 /*10*/ 0,
2593                                        0,
2594                                        0,
2595                                        MatSOR_MPIAIJ,
2596                                        MatTranspose_MPIAIJ,
2597                                 /*15*/ MatGetInfo_MPIAIJ,
2598                                        MatEqual_MPIAIJ,
2599                                        MatGetDiagonal_MPIAIJ,
2600                                        MatDiagonalScale_MPIAIJ,
2601                                        MatNorm_MPIAIJ,
2602                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2603                                        MatAssemblyEnd_MPIAIJ,
2604                                        MatSetOption_MPIAIJ,
2605                                        MatZeroEntries_MPIAIJ,
2606                                 /*24*/ MatZeroRows_MPIAIJ,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                 /*29*/ MatSetUp_MPIAIJ,
2612                                        0,
2613                                        0,
2614                                        MatGetDiagonalBlock_MPIAIJ,
2615                                        0,
2616                                 /*34*/ MatDuplicate_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                 /*39*/ MatAXPY_MPIAIJ,
2622                                        MatGetSubMatrices_MPIAIJ,
2623                                        MatIncreaseOverlap_MPIAIJ,
2624                                        MatGetValues_MPIAIJ,
2625                                        MatCopy_MPIAIJ,
2626                                 /*44*/ MatGetRowMax_MPIAIJ,
2627                                        MatScale_MPIAIJ,
2628                                        MatShift_MPIAIJ,
2629                                        MatDiagonalSet_MPIAIJ,
2630                                        MatZeroRowsColumns_MPIAIJ,
2631                                 /*49*/ MatSetRandom_MPIAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2637                                        0,
2638                                        MatSetUnfactored_MPIAIJ,
2639                                        MatPermute_MPIAIJ,
2640                                        0,
2641                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2642                                        MatDestroy_MPIAIJ,
2643                                        MatView_MPIAIJ,
2644                                        0,
2645                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2646                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2647                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2652                                        MatGetRowMinAbs_MPIAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                 /*75*/ MatFDColoringApply_AIJ,
2658                                        MatSetFromOptions_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        MatFindZeroDiagonals_MPIAIJ,
2662                                 /*80*/ 0,
2663                                        0,
2664                                        0,
2665                                 /*83*/ MatLoad_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2672                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2673                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2674                                        MatPtAP_MPIAIJ_MPIAIJ,
2675                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2676                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                 /*99*/ 0,
2682                                        0,
2683                                        0,
2684                                        MatConjugate_MPIAIJ,
2685                                        0,
2686                                 /*104*/MatSetValuesRow_MPIAIJ,
2687                                        MatRealPart_MPIAIJ,
2688                                        MatImaginaryPart_MPIAIJ,
2689                                        0,
2690                                        0,
2691                                 /*109*/0,
2692                                        0,
2693                                        MatGetRowMin_MPIAIJ,
2694                                        0,
2695                                        MatMissingDiagonal_MPIAIJ,
2696                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2697                                        0,
2698                                        MatGetGhosts_MPIAIJ,
2699                                        0,
2700                                        0,
2701                                 /*119*/0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        MatGetMultiProcBlock_MPIAIJ,
2706                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2707                                        MatGetColumnNorms_MPIAIJ,
2708                                        MatInvertBlockDiagonal_MPIAIJ,
2709                                        0,
2710                                        MatGetSubMatricesMPI_MPIAIJ,
2711                                 /*129*/0,
2712                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2713                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2714                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2715                                        0,
2716                                 /*134*/0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                 /*139*/MatSetBlockSizes_MPIAIJ,
2722                                        0,
2723                                        0,
2724                                        MatFDColoringSetUp_MPIXAIJ,
2725                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2726                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2727 };
2728 
2729 /* ----------------------------------------------------------------------------------------*/
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2733 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2734 {
2735   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2736   PetscErrorCode ierr;
2737 
2738   PetscFunctionBegin;
2739   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2740   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 #undef __FUNCT__
2745 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2746 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2747 {
2748   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2749   PetscErrorCode ierr;
2750 
2751   PetscFunctionBegin;
2752   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2753   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 #undef __FUNCT__
2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2759 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763 
2764   PetscFunctionBegin;
2765   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2766   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2767   b = (Mat_MPIAIJ*)B->data;
2768 
2769 #if defined(PETSC_USE_CTABLE)
2770   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2771 #else
2772   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2773 #endif
2774   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2775   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2776   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2777 
2778   /* Because the B will have been resized we simply destroy it and create a new one each time */
2779   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2780   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2781   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2782   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2783   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2784   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2785 
2786   if (!B->preallocated) {
2787     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2788     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2789     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2790     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2791     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2792   }
2793 
2794   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2795   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2796   B->preallocated  = PETSC_TRUE;
2797   B->was_assembled = PETSC_FALSE;
2798   B->assembled     = PETSC_FALSE;;
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2804 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2805 {
2806   Mat            mat;
2807   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2808   PetscErrorCode ierr;
2809 
2810   PetscFunctionBegin;
2811   *newmat = 0;
2812   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2813   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2814   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2815   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2816   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2817   a       = (Mat_MPIAIJ*)mat->data;
2818 
2819   mat->factortype   = matin->factortype;
2820   mat->assembled    = PETSC_TRUE;
2821   mat->insertmode   = NOT_SET_VALUES;
2822   mat->preallocated = PETSC_TRUE;
2823 
2824   a->size         = oldmat->size;
2825   a->rank         = oldmat->rank;
2826   a->donotstash   = oldmat->donotstash;
2827   a->roworiented  = oldmat->roworiented;
2828   a->rowindices   = 0;
2829   a->rowvalues    = 0;
2830   a->getrowactive = PETSC_FALSE;
2831 
2832   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2833   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2834 
2835   if (oldmat->colmap) {
2836 #if defined(PETSC_USE_CTABLE)
2837     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2838 #else
2839     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2840     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2841     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2842 #endif
2843   } else a->colmap = 0;
2844   if (oldmat->garray) {
2845     PetscInt len;
2846     len  = oldmat->B->cmap->n;
2847     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2848     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2849     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2850   } else a->garray = 0;
2851 
2852   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2853   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2854   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2855   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2856   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2857   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2858   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2859   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2860   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2861   *newmat = mat;
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 
2866 
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatLoad_MPIAIJ"
2869 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2870 {
2871   PetscScalar    *vals,*svals;
2872   MPI_Comm       comm;
2873   PetscErrorCode ierr;
2874   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2875   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2876   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2877   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2878   PetscInt       cend,cstart,n,*rowners;
2879   int            fd;
2880   PetscInt       bs = newMat->rmap->bs;
2881 
2882   PetscFunctionBegin;
2883   /* force binary viewer to load .info file if it has not yet done so */
2884   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2885   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2886   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2887   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2888   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2889   if (!rank) {
2890     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2891     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2892     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2893   }
2894 
2895   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2896   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2897   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2898   if (bs < 0) bs = 1;
2899 
2900   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2901   M    = header[1]; N = header[2];
2902 
2903   /* If global sizes are set, check if they are consistent with that given in the file */
2904   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2905   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2906 
2907   /* determine ownership of all (block) rows */
2908   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2909   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2910   else m = newMat->rmap->n; /* Set by user */
2911 
2912   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2913   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2914 
2915   /* First process needs enough room for process with most rows */
2916   if (!rank) {
2917     mmax = rowners[1];
2918     for (i=2; i<=size; i++) {
2919       mmax = PetscMax(mmax, rowners[i]);
2920     }
2921   } else mmax = -1;             /* unused, but compilers complain */
2922 
2923   rowners[0] = 0;
2924   for (i=2; i<=size; i++) {
2925     rowners[i] += rowners[i-1];
2926   }
2927   rstart = rowners[rank];
2928   rend   = rowners[rank+1];
2929 
2930   /* distribute row lengths to all processors */
2931   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2932   if (!rank) {
2933     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2934     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2935     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2936     for (j=0; j<m; j++) {
2937       procsnz[0] += ourlens[j];
2938     }
2939     for (i=1; i<size; i++) {
2940       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2941       /* calculate the number of nonzeros on each processor */
2942       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2943         procsnz[i] += rowlengths[j];
2944       }
2945       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2946     }
2947     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2948   } else {
2949     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2950   }
2951 
2952   if (!rank) {
2953     /* determine max buffer needed and allocate it */
2954     maxnz = 0;
2955     for (i=0; i<size; i++) {
2956       maxnz = PetscMax(maxnz,procsnz[i]);
2957     }
2958     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2959 
2960     /* read in my part of the matrix column indices  */
2961     nz   = procsnz[0];
2962     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2963     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2964 
2965     /* read in every one elses and ship off */
2966     for (i=1; i<size; i++) {
2967       nz   = procsnz[i];
2968       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2969       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2970     }
2971     ierr = PetscFree(cols);CHKERRQ(ierr);
2972   } else {
2973     /* determine buffer space needed for message */
2974     nz = 0;
2975     for (i=0; i<m; i++) {
2976       nz += ourlens[i];
2977     }
2978     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2979 
2980     /* receive message of column indices*/
2981     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2982   }
2983 
2984   /* determine column ownership if matrix is not square */
2985   if (N != M) {
2986     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2987     else n = newMat->cmap->n;
2988     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2989     cstart = cend - n;
2990   } else {
2991     cstart = rstart;
2992     cend   = rend;
2993     n      = cend - cstart;
2994   }
2995 
2996   /* loop over local rows, determining number of off diagonal entries */
2997   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2998   jj   = 0;
2999   for (i=0; i<m; i++) {
3000     for (j=0; j<ourlens[i]; j++) {
3001       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3002       jj++;
3003     }
3004   }
3005 
3006   for (i=0; i<m; i++) {
3007     ourlens[i] -= offlens[i];
3008   }
3009   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3010 
3011   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3012 
3013   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3014 
3015   for (i=0; i<m; i++) {
3016     ourlens[i] += offlens[i];
3017   }
3018 
3019   if (!rank) {
3020     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3021 
3022     /* read in my part of the matrix numerical values  */
3023     nz   = procsnz[0];
3024     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3025 
3026     /* insert into matrix */
3027     jj      = rstart;
3028     smycols = mycols;
3029     svals   = vals;
3030     for (i=0; i<m; i++) {
3031       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3032       smycols += ourlens[i];
3033       svals   += ourlens[i];
3034       jj++;
3035     }
3036 
3037     /* read in other processors and ship out */
3038     for (i=1; i<size; i++) {
3039       nz   = procsnz[i];
3040       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3041       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3042     }
3043     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3044   } else {
3045     /* receive numeric values */
3046     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3047 
3048     /* receive message of values*/
3049     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3050 
3051     /* insert into matrix */
3052     jj      = rstart;
3053     smycols = mycols;
3054     svals   = vals;
3055     for (i=0; i<m; i++) {
3056       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3057       smycols += ourlens[i];
3058       svals   += ourlens[i];
3059       jj++;
3060     }
3061   }
3062   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3063   ierr = PetscFree(vals);CHKERRQ(ierr);
3064   ierr = PetscFree(mycols);CHKERRQ(ierr);
3065   ierr = PetscFree(rowners);CHKERRQ(ierr);
3066   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3073 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3075 {
3076   PetscErrorCode ierr;
3077   IS             iscol_local;
3078   PetscInt       csize;
3079 
3080   PetscFunctionBegin;
3081   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3082   if (call == MAT_REUSE_MATRIX) {
3083     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3084     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3085   } else {
3086     /* check if we are grabbing all columns*/
3087     PetscBool    isstride;
3088     PetscMPIInt  lisstride = 0,gisstride;
3089     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3090     if (isstride) {
3091       PetscInt  start,len,mstart,mlen;
3092       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3093       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3094       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3095       if (mstart == start && mlen-mstart == len) lisstride = 1;
3096     }
3097     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3098     if (gisstride) {
3099       PetscInt N;
3100       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3101       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3102       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3103       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3104     } else {
3105       PetscInt cbs;
3106       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3107       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3108       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3109     }
3110   }
3111   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3112   if (call == MAT_INITIAL_MATRIX) {
3113     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3114     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3115   }
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3120 #undef __FUNCT__
3121 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3122 /*
3123     Not great since it makes two copies of the submatrix, first an SeqAIJ
3124   in local and then by concatenating the local matrices the end result.
3125   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3126 
3127   Note: This requires a sequential iscol with all indices.
3128 */
3129 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3130 {
3131   PetscErrorCode ierr;
3132   PetscMPIInt    rank,size;
3133   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3134   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3135   PetscBool      allcolumns, colflag;
3136   Mat            M,Mreuse;
3137   MatScalar      *vwork,*aa;
3138   MPI_Comm       comm;
3139   Mat_SeqAIJ     *aij;
3140 
3141   PetscFunctionBegin;
3142   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3143   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3144   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3145 
3146   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3147   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3148   if (colflag && ncol == mat->cmap->N) {
3149     allcolumns = PETSC_TRUE;
3150     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3151   } else {
3152     allcolumns = PETSC_FALSE;
3153   }
3154   if (call ==  MAT_REUSE_MATRIX) {
3155     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3156     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3157     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3158   } else {
3159     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3160   }
3161 
3162   /*
3163       m - number of local rows
3164       n - number of columns (same on all processors)
3165       rstart - first row in new global matrix generated
3166   */
3167   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3168   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3169   if (call == MAT_INITIAL_MATRIX) {
3170     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3171     ii  = aij->i;
3172     jj  = aij->j;
3173 
3174     /*
3175         Determine the number of non-zeros in the diagonal and off-diagonal
3176         portions of the matrix in order to do correct preallocation
3177     */
3178 
3179     /* first get start and end of "diagonal" columns */
3180     if (csize == PETSC_DECIDE) {
3181       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3182       if (mglobal == n) { /* square matrix */
3183         nlocal = m;
3184       } else {
3185         nlocal = n/size + ((n % size) > rank);
3186       }
3187     } else {
3188       nlocal = csize;
3189     }
3190     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3191     rstart = rend - nlocal;
3192     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3193 
3194     /* next, compute all the lengths */
3195     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3196     olens = dlens + m;
3197     for (i=0; i<m; i++) {
3198       jend = ii[i+1] - ii[i];
3199       olen = 0;
3200       dlen = 0;
3201       for (j=0; j<jend; j++) {
3202         if (*jj < rstart || *jj >= rend) olen++;
3203         else dlen++;
3204         jj++;
3205       }
3206       olens[i] = olen;
3207       dlens[i] = dlen;
3208     }
3209     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3210     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3211     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3212     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3213     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3214     ierr = PetscFree(dlens);CHKERRQ(ierr);
3215   } else {
3216     PetscInt ml,nl;
3217 
3218     M    = *newmat;
3219     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3220     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3221     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3222     /*
3223          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3224        rather than the slower MatSetValues().
3225     */
3226     M->was_assembled = PETSC_TRUE;
3227     M->assembled     = PETSC_FALSE;
3228   }
3229   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3230   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3231   ii   = aij->i;
3232   jj   = aij->j;
3233   aa   = aij->a;
3234   for (i=0; i<m; i++) {
3235     row   = rstart + i;
3236     nz    = ii[i+1] - ii[i];
3237     cwork = jj;     jj += nz;
3238     vwork = aa;     aa += nz;
3239     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3240   }
3241 
3242   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3243   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3244   *newmat = M;
3245 
3246   /* save submatrix used in processor for next request */
3247   if (call ==  MAT_INITIAL_MATRIX) {
3248     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3249     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3250   }
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 #undef __FUNCT__
3255 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3256 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3257 {
3258   PetscInt       m,cstart, cend,j,nnz,i,d;
3259   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3260   const PetscInt *JJ;
3261   PetscScalar    *values;
3262   PetscErrorCode ierr;
3263 
3264   PetscFunctionBegin;
3265   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3266 
3267   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3268   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3269   m      = B->rmap->n;
3270   cstart = B->cmap->rstart;
3271   cend   = B->cmap->rend;
3272   rstart = B->rmap->rstart;
3273 
3274   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3275 
3276 #if defined(PETSC_USE_DEBUGGING)
3277   for (i=0; i<m; i++) {
3278     nnz = Ii[i+1]- Ii[i];
3279     JJ  = J + Ii[i];
3280     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3281     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3282     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3283   }
3284 #endif
3285 
3286   for (i=0; i<m; i++) {
3287     nnz     = Ii[i+1]- Ii[i];
3288     JJ      = J + Ii[i];
3289     nnz_max = PetscMax(nnz_max,nnz);
3290     d       = 0;
3291     for (j=0; j<nnz; j++) {
3292       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3293     }
3294     d_nnz[i] = d;
3295     o_nnz[i] = nnz - d;
3296   }
3297   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3298   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3299 
3300   if (v) values = (PetscScalar*)v;
3301   else {
3302     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3303   }
3304 
3305   for (i=0; i<m; i++) {
3306     ii   = i + rstart;
3307     nnz  = Ii[i+1]- Ii[i];
3308     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3309   }
3310   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3311   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3312 
3313   if (!v) {
3314     ierr = PetscFree(values);CHKERRQ(ierr);
3315   }
3316   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3317   PetscFunctionReturn(0);
3318 }
3319 
3320 #undef __FUNCT__
3321 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3322 /*@
3323    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3324    (the default parallel PETSc format).
3325 
3326    Collective on MPI_Comm
3327 
3328    Input Parameters:
3329 +  B - the matrix
3330 .  i - the indices into j for the start of each local row (starts with zero)
3331 .  j - the column indices for each local row (starts with zero)
3332 -  v - optional values in the matrix
3333 
3334    Level: developer
3335 
3336    Notes:
3337        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3338      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3339      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3340 
3341        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3342 
3343        The format which is used for the sparse matrix input, is equivalent to a
3344     row-major ordering.. i.e for the following matrix, the input data expected is
3345     as shown
3346 
3347 $        1 0 0
3348 $        2 0 3     P0
3349 $       -------
3350 $        4 5 6     P1
3351 $
3352 $     Process0 [P0]: rows_owned=[0,1]
3353 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3354 $        j =  {0,0,2}  [size = 3]
3355 $        v =  {1,2,3}  [size = 3]
3356 $
3357 $     Process1 [P1]: rows_owned=[2]
3358 $        i =  {0,3}    [size = nrow+1  = 1+1]
3359 $        j =  {0,1,2}  [size = 3]
3360 $        v =  {4,5,6}  [size = 3]
3361 
3362 .keywords: matrix, aij, compressed row, sparse, parallel
3363 
3364 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3365           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3366 @*/
3367 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3368 {
3369   PetscErrorCode ierr;
3370 
3371   PetscFunctionBegin;
3372   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3373   PetscFunctionReturn(0);
3374 }
3375 
3376 #undef __FUNCT__
3377 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3378 /*@C
3379    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3380    (the default parallel PETSc format).  For good matrix assembly performance
3381    the user should preallocate the matrix storage by setting the parameters
3382    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3383    performance can be increased by more than a factor of 50.
3384 
3385    Collective on MPI_Comm
3386 
3387    Input Parameters:
3388 +  B - the matrix
3389 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3390            (same value is used for all local rows)
3391 .  d_nnz - array containing the number of nonzeros in the various rows of the
3392            DIAGONAL portion of the local submatrix (possibly different for each row)
3393            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3394            The size of this array is equal to the number of local rows, i.e 'm'.
3395            For matrices that will be factored, you must leave room for (and set)
3396            the diagonal entry even if it is zero.
3397 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3398            submatrix (same value is used for all local rows).
3399 -  o_nnz - array containing the number of nonzeros in the various rows of the
3400            OFF-DIAGONAL portion of the local submatrix (possibly different for
3401            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3402            structure. The size of this array is equal to the number
3403            of local rows, i.e 'm'.
3404 
3405    If the *_nnz parameter is given then the *_nz parameter is ignored
3406 
3407    The AIJ format (also called the Yale sparse matrix format or
3408    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3409    storage.  The stored row and column indices begin with zero.
3410    See Users-Manual: ch_mat for details.
3411 
3412    The parallel matrix is partitioned such that the first m0 rows belong to
3413    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3414    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3415 
3416    The DIAGONAL portion of the local submatrix of a processor can be defined
3417    as the submatrix which is obtained by extraction the part corresponding to
3418    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3419    first row that belongs to the processor, r2 is the last row belonging to
3420    the this processor, and c1-c2 is range of indices of the local part of a
3421    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3422    common case of a square matrix, the row and column ranges are the same and
3423    the DIAGONAL part is also square. The remaining portion of the local
3424    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3425 
3426    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3427 
3428    You can call MatGetInfo() to get information on how effective the preallocation was;
3429    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3430    You can also run with the option -info and look for messages with the string
3431    malloc in them to see if additional memory allocation was needed.
3432 
3433    Example usage:
3434 
3435    Consider the following 8x8 matrix with 34 non-zero values, that is
3436    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3437    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3438    as follows:
3439 
3440 .vb
3441             1  2  0  |  0  3  0  |  0  4
3442     Proc0   0  5  6  |  7  0  0  |  8  0
3443             9  0 10  | 11  0  0  | 12  0
3444     -------------------------------------
3445            13  0 14  | 15 16 17  |  0  0
3446     Proc1   0 18  0  | 19 20 21  |  0  0
3447             0  0  0  | 22 23  0  | 24  0
3448     -------------------------------------
3449     Proc2  25 26 27  |  0  0 28  | 29  0
3450            30  0  0  | 31 32 33  |  0 34
3451 .ve
3452 
3453    This can be represented as a collection of submatrices as:
3454 
3455 .vb
3456       A B C
3457       D E F
3458       G H I
3459 .ve
3460 
3461    Where the submatrices A,B,C are owned by proc0, D,E,F are
3462    owned by proc1, G,H,I are owned by proc2.
3463 
3464    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3465    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3466    The 'M','N' parameters are 8,8, and have the same values on all procs.
3467 
3468    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3469    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3470    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3471    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3472    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3473    matrix, ans [DF] as another SeqAIJ matrix.
3474 
3475    When d_nz, o_nz parameters are specified, d_nz storage elements are
3476    allocated for every row of the local diagonal submatrix, and o_nz
3477    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3478    One way to choose d_nz and o_nz is to use the max nonzerors per local
3479    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3480    In this case, the values of d_nz,o_nz are:
3481 .vb
3482      proc0 : dnz = 2, o_nz = 2
3483      proc1 : dnz = 3, o_nz = 2
3484      proc2 : dnz = 1, o_nz = 4
3485 .ve
3486    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3487    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3488    for proc3. i.e we are using 12+15+10=37 storage locations to store
3489    34 values.
3490 
3491    When d_nnz, o_nnz parameters are specified, the storage is specified
3492    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3493    In the above case the values for d_nnz,o_nnz are:
3494 .vb
3495      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3496      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3497      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3498 .ve
3499    Here the space allocated is sum of all the above values i.e 34, and
3500    hence pre-allocation is perfect.
3501 
3502    Level: intermediate
3503 
3504 .keywords: matrix, aij, compressed row, sparse, parallel
3505 
3506 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3507           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3508 @*/
3509 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3510 {
3511   PetscErrorCode ierr;
3512 
3513   PetscFunctionBegin;
3514   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3515   PetscValidType(B,1);
3516   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3517   PetscFunctionReturn(0);
3518 }
3519 
3520 #undef __FUNCT__
3521 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3522 /*@
3523      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3524          CSR format the local rows.
3525 
3526    Collective on MPI_Comm
3527 
3528    Input Parameters:
3529 +  comm - MPI communicator
3530 .  m - number of local rows (Cannot be PETSC_DECIDE)
3531 .  n - This value should be the same as the local size used in creating the
3532        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3533        calculated if N is given) For square matrices n is almost always m.
3534 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3535 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3536 .   i - row indices
3537 .   j - column indices
3538 -   a - matrix values
3539 
3540    Output Parameter:
3541 .   mat - the matrix
3542 
3543    Level: intermediate
3544 
3545    Notes:
3546        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3547      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3548      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3549 
3550        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3551 
3552        The format which is used for the sparse matrix input, is equivalent to a
3553     row-major ordering.. i.e for the following matrix, the input data expected is
3554     as shown
3555 
3556 $        1 0 0
3557 $        2 0 3     P0
3558 $       -------
3559 $        4 5 6     P1
3560 $
3561 $     Process0 [P0]: rows_owned=[0,1]
3562 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3563 $        j =  {0,0,2}  [size = 3]
3564 $        v =  {1,2,3}  [size = 3]
3565 $
3566 $     Process1 [P1]: rows_owned=[2]
3567 $        i =  {0,3}    [size = nrow+1  = 1+1]
3568 $        j =  {0,1,2}  [size = 3]
3569 $        v =  {4,5,6}  [size = 3]
3570 
3571 .keywords: matrix, aij, compressed row, sparse, parallel
3572 
3573 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3574           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3575 @*/
3576 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3577 {
3578   PetscErrorCode ierr;
3579 
3580   PetscFunctionBegin;
3581   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3582   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3583   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3584   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3585   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3586   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3587   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3588   PetscFunctionReturn(0);
3589 }
3590 
3591 #undef __FUNCT__
3592 #define __FUNCT__ "MatCreateAIJ"
3593 /*@C
3594    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3595    (the default parallel PETSc format).  For good matrix assembly performance
3596    the user should preallocate the matrix storage by setting the parameters
3597    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3598    performance can be increased by more than a factor of 50.
3599 
3600    Collective on MPI_Comm
3601 
3602    Input Parameters:
3603 +  comm - MPI communicator
3604 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3605            This value should be the same as the local size used in creating the
3606            y vector for the matrix-vector product y = Ax.
3607 .  n - This value should be the same as the local size used in creating the
3608        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3609        calculated if N is given) For square matrices n is almost always m.
3610 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3611 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3612 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3613            (same value is used for all local rows)
3614 .  d_nnz - array containing the number of nonzeros in the various rows of the
3615            DIAGONAL portion of the local submatrix (possibly different for each row)
3616            or NULL, if d_nz is used to specify the nonzero structure.
3617            The size of this array is equal to the number of local rows, i.e 'm'.
3618 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3619            submatrix (same value is used for all local rows).
3620 -  o_nnz - array containing the number of nonzeros in the various rows of the
3621            OFF-DIAGONAL portion of the local submatrix (possibly different for
3622            each row) or NULL, if o_nz is used to specify the nonzero
3623            structure. The size of this array is equal to the number
3624            of local rows, i.e 'm'.
3625 
3626    Output Parameter:
3627 .  A - the matrix
3628 
3629    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3630    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3631    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3632 
3633    Notes:
3634    If the *_nnz parameter is given then the *_nz parameter is ignored
3635 
3636    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3637    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3638    storage requirements for this matrix.
3639 
3640    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3641    processor than it must be used on all processors that share the object for
3642    that argument.
3643 
3644    The user MUST specify either the local or global matrix dimensions
3645    (possibly both).
3646 
3647    The parallel matrix is partitioned across processors such that the
3648    first m0 rows belong to process 0, the next m1 rows belong to
3649    process 1, the next m2 rows belong to process 2 etc.. where
3650    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3651    values corresponding to [m x N] submatrix.
3652 
3653    The columns are logically partitioned with the n0 columns belonging
3654    to 0th partition, the next n1 columns belonging to the next
3655    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3656 
3657    The DIAGONAL portion of the local submatrix on any given processor
3658    is the submatrix corresponding to the rows and columns m,n
3659    corresponding to the given processor. i.e diagonal matrix on
3660    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3661    etc. The remaining portion of the local submatrix [m x (N-n)]
3662    constitute the OFF-DIAGONAL portion. The example below better
3663    illustrates this concept.
3664 
3665    For a square global matrix we define each processor's diagonal portion
3666    to be its local rows and the corresponding columns (a square submatrix);
3667    each processor's off-diagonal portion encompasses the remainder of the
3668    local matrix (a rectangular submatrix).
3669 
3670    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3671 
3672    When calling this routine with a single process communicator, a matrix of
3673    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3674    type of communicator, use the construction mechanism:
3675      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3676 
3677    By default, this format uses inodes (identical nodes) when possible.
3678    We search for consecutive rows with the same nonzero structure, thereby
3679    reusing matrix information to achieve increased efficiency.
3680 
3681    Options Database Keys:
3682 +  -mat_no_inode  - Do not use inodes
3683 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3684 -  -mat_aij_oneindex - Internally use indexing starting at 1
3685         rather than 0.  Note that when calling MatSetValues(),
3686         the user still MUST index entries starting at 0!
3687 
3688 
3689    Example usage:
3690 
3691    Consider the following 8x8 matrix with 34 non-zero values, that is
3692    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3693    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3694    as follows:
3695 
3696 .vb
3697             1  2  0  |  0  3  0  |  0  4
3698     Proc0   0  5  6  |  7  0  0  |  8  0
3699             9  0 10  | 11  0  0  | 12  0
3700     -------------------------------------
3701            13  0 14  | 15 16 17  |  0  0
3702     Proc1   0 18  0  | 19 20 21  |  0  0
3703             0  0  0  | 22 23  0  | 24  0
3704     -------------------------------------
3705     Proc2  25 26 27  |  0  0 28  | 29  0
3706            30  0  0  | 31 32 33  |  0 34
3707 .ve
3708 
3709    This can be represented as a collection of submatrices as:
3710 
3711 .vb
3712       A B C
3713       D E F
3714       G H I
3715 .ve
3716 
3717    Where the submatrices A,B,C are owned by proc0, D,E,F are
3718    owned by proc1, G,H,I are owned by proc2.
3719 
3720    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3721    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3722    The 'M','N' parameters are 8,8, and have the same values on all procs.
3723 
3724    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3725    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3726    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3727    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3728    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3729    matrix, ans [DF] as another SeqAIJ matrix.
3730 
3731    When d_nz, o_nz parameters are specified, d_nz storage elements are
3732    allocated for every row of the local diagonal submatrix, and o_nz
3733    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3734    One way to choose d_nz and o_nz is to use the max nonzerors per local
3735    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3736    In this case, the values of d_nz,o_nz are:
3737 .vb
3738      proc0 : dnz = 2, o_nz = 2
3739      proc1 : dnz = 3, o_nz = 2
3740      proc2 : dnz = 1, o_nz = 4
3741 .ve
3742    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3743    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3744    for proc3. i.e we are using 12+15+10=37 storage locations to store
3745    34 values.
3746 
3747    When d_nnz, o_nnz parameters are specified, the storage is specified
3748    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3749    In the above case the values for d_nnz,o_nnz are:
3750 .vb
3751      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3752      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3753      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3754 .ve
3755    Here the space allocated is sum of all the above values i.e 34, and
3756    hence pre-allocation is perfect.
3757 
3758    Level: intermediate
3759 
3760 .keywords: matrix, aij, compressed row, sparse, parallel
3761 
3762 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3763           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3764 @*/
3765 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3766 {
3767   PetscErrorCode ierr;
3768   PetscMPIInt    size;
3769 
3770   PetscFunctionBegin;
3771   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3772   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3773   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3774   if (size > 1) {
3775     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3776     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3777   } else {
3778     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3779     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3780   }
3781   PetscFunctionReturn(0);
3782 }
3783 
3784 #undef __FUNCT__
3785 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3786 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3787 {
3788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3789   PetscBool      flg;
3790   PetscErrorCode ierr;
3791 
3792   PetscFunctionBegin;
3793   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3794   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3795   if (Ad)     *Ad     = a->A;
3796   if (Ao)     *Ao     = a->B;
3797   if (colmap) *colmap = a->garray;
3798   PetscFunctionReturn(0);
3799 }
3800 
3801 #undef __FUNCT__
3802 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3803 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3804 {
3805   PetscErrorCode ierr;
3806   PetscInt       m,N,i,rstart,nnz,Ii;
3807   PetscInt       *indx;
3808   PetscScalar    *values;
3809 
3810   PetscFunctionBegin;
3811   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3812   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3813     PetscInt       *dnz,*onz,sum,bs,cbs;
3814 
3815     if (n == PETSC_DECIDE) {
3816       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3817     }
3818     /* Check sum(n) = N */
3819     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3820     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3821 
3822     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3823     rstart -= m;
3824 
3825     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3826     for (i=0; i<m; i++) {
3827       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3828       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3829       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3830     }
3831 
3832     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3833     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3834     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3835     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3836     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3837     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3838     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3839   }
3840 
3841   /* numeric phase */
3842   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3843   for (i=0; i<m; i++) {
3844     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3845     Ii   = i + rstart;
3846     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3847     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3848   }
3849   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3850   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3851   PetscFunctionReturn(0);
3852 }
3853 
3854 #undef __FUNCT__
3855 #define __FUNCT__ "MatFileSplit"
3856 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3857 {
3858   PetscErrorCode    ierr;
3859   PetscMPIInt       rank;
3860   PetscInt          m,N,i,rstart,nnz;
3861   size_t            len;
3862   const PetscInt    *indx;
3863   PetscViewer       out;
3864   char              *name;
3865   Mat               B;
3866   const PetscScalar *values;
3867 
3868   PetscFunctionBegin;
3869   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3870   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3871   /* Should this be the type of the diagonal block of A? */
3872   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3873   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3874   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3875   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3876   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3877   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3878   for (i=0; i<m; i++) {
3879     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3880     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3881     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3882   }
3883   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3885 
3886   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3887   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3888   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3889   sprintf(name,"%s.%d",outfile,rank);
3890   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3891   ierr = PetscFree(name);CHKERRQ(ierr);
3892   ierr = MatView(B,out);CHKERRQ(ierr);
3893   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3894   ierr = MatDestroy(&B);CHKERRQ(ierr);
3895   PetscFunctionReturn(0);
3896 }
3897 
3898 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3899 #undef __FUNCT__
3900 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3901 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3902 {
3903   PetscErrorCode      ierr;
3904   Mat_Merge_SeqsToMPI *merge;
3905   PetscContainer      container;
3906 
3907   PetscFunctionBegin;
3908   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3909   if (container) {
3910     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3911     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3912     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3913     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3914     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3915     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3916     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3917     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3918     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3919     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3920     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3921     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3922     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3923     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3924     ierr = PetscFree(merge);CHKERRQ(ierr);
3925     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3926   }
3927   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3928   PetscFunctionReturn(0);
3929 }
3930 
3931 #include <../src/mat/utils/freespace.h>
3932 #include <petscbt.h>
3933 
3934 #undef __FUNCT__
3935 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3936 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3937 {
3938   PetscErrorCode      ierr;
3939   MPI_Comm            comm;
3940   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3941   PetscMPIInt         size,rank,taga,*len_s;
3942   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3943   PetscInt            proc,m;
3944   PetscInt            **buf_ri,**buf_rj;
3945   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3946   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3947   MPI_Request         *s_waits,*r_waits;
3948   MPI_Status          *status;
3949   MatScalar           *aa=a->a;
3950   MatScalar           **abuf_r,*ba_i;
3951   Mat_Merge_SeqsToMPI *merge;
3952   PetscContainer      container;
3953 
3954   PetscFunctionBegin;
3955   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3956   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3957 
3958   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3959   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3960 
3961   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3962   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3963 
3964   bi     = merge->bi;
3965   bj     = merge->bj;
3966   buf_ri = merge->buf_ri;
3967   buf_rj = merge->buf_rj;
3968 
3969   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3970   owners = merge->rowmap->range;
3971   len_s  = merge->len_s;
3972 
3973   /* send and recv matrix values */
3974   /*-----------------------------*/
3975   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3976   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3977 
3978   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3979   for (proc=0,k=0; proc<size; proc++) {
3980     if (!len_s[proc]) continue;
3981     i    = owners[proc];
3982     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3983     k++;
3984   }
3985 
3986   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3987   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3988   ierr = PetscFree(status);CHKERRQ(ierr);
3989 
3990   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3991   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3992 
3993   /* insert mat values of mpimat */
3994   /*----------------------------*/
3995   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3996   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3997 
3998   for (k=0; k<merge->nrecv; k++) {
3999     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4000     nrows       = *(buf_ri_k[k]);
4001     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4002     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4003   }
4004 
4005   /* set values of ba */
4006   m = merge->rowmap->n;
4007   for (i=0; i<m; i++) {
4008     arow = owners[rank] + i;
4009     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4010     bnzi = bi[i+1] - bi[i];
4011     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4012 
4013     /* add local non-zero vals of this proc's seqmat into ba */
4014     anzi   = ai[arow+1] - ai[arow];
4015     aj     = a->j + ai[arow];
4016     aa     = a->a + ai[arow];
4017     nextaj = 0;
4018     for (j=0; nextaj<anzi; j++) {
4019       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4020         ba_i[j] += aa[nextaj++];
4021       }
4022     }
4023 
4024     /* add received vals into ba */
4025     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4026       /* i-th row */
4027       if (i == *nextrow[k]) {
4028         anzi   = *(nextai[k]+1) - *nextai[k];
4029         aj     = buf_rj[k] + *(nextai[k]);
4030         aa     = abuf_r[k] + *(nextai[k]);
4031         nextaj = 0;
4032         for (j=0; nextaj<anzi; j++) {
4033           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4034             ba_i[j] += aa[nextaj++];
4035           }
4036         }
4037         nextrow[k]++; nextai[k]++;
4038       }
4039     }
4040     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4041   }
4042   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4043   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4044 
4045   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4046   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4047   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4048   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4049   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4050   PetscFunctionReturn(0);
4051 }
4052 
4053 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4054 
4055 #undef __FUNCT__
4056 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4057 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4058 {
4059   PetscErrorCode      ierr;
4060   Mat                 B_mpi;
4061   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4062   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4063   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4064   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4065   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4066   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4067   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4068   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4069   MPI_Status          *status;
4070   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4071   PetscBT             lnkbt;
4072   Mat_Merge_SeqsToMPI *merge;
4073   PetscContainer      container;
4074 
4075   PetscFunctionBegin;
4076   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4077 
4078   /* make sure it is a PETSc comm */
4079   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4081   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4082 
4083   ierr = PetscNew(&merge);CHKERRQ(ierr);
4084   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4085 
4086   /* determine row ownership */
4087   /*---------------------------------------------------------*/
4088   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4089   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4090   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4091   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4092   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4093   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4094   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4095 
4096   m      = merge->rowmap->n;
4097   owners = merge->rowmap->range;
4098 
4099   /* determine the number of messages to send, their lengths */
4100   /*---------------------------------------------------------*/
4101   len_s = merge->len_s;
4102 
4103   len          = 0; /* length of buf_si[] */
4104   merge->nsend = 0;
4105   for (proc=0; proc<size; proc++) {
4106     len_si[proc] = 0;
4107     if (proc == rank) {
4108       len_s[proc] = 0;
4109     } else {
4110       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4111       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4112     }
4113     if (len_s[proc]) {
4114       merge->nsend++;
4115       nrows = 0;
4116       for (i=owners[proc]; i<owners[proc+1]; i++) {
4117         if (ai[i+1] > ai[i]) nrows++;
4118       }
4119       len_si[proc] = 2*(nrows+1);
4120       len         += len_si[proc];
4121     }
4122   }
4123 
4124   /* determine the number and length of messages to receive for ij-structure */
4125   /*-------------------------------------------------------------------------*/
4126   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4127   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4128 
4129   /* post the Irecv of j-structure */
4130   /*-------------------------------*/
4131   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4132   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4133 
4134   /* post the Isend of j-structure */
4135   /*--------------------------------*/
4136   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4137 
4138   for (proc=0, k=0; proc<size; proc++) {
4139     if (!len_s[proc]) continue;
4140     i    = owners[proc];
4141     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4142     k++;
4143   }
4144 
4145   /* receives and sends of j-structure are complete */
4146   /*------------------------------------------------*/
4147   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4148   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4149 
4150   /* send and recv i-structure */
4151   /*---------------------------*/
4152   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4153   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4154 
4155   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4156   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4157   for (proc=0,k=0; proc<size; proc++) {
4158     if (!len_s[proc]) continue;
4159     /* form outgoing message for i-structure:
4160          buf_si[0]:                 nrows to be sent
4161                [1:nrows]:           row index (global)
4162                [nrows+1:2*nrows+1]: i-structure index
4163     */
4164     /*-------------------------------------------*/
4165     nrows       = len_si[proc]/2 - 1;
4166     buf_si_i    = buf_si + nrows+1;
4167     buf_si[0]   = nrows;
4168     buf_si_i[0] = 0;
4169     nrows       = 0;
4170     for (i=owners[proc]; i<owners[proc+1]; i++) {
4171       anzi = ai[i+1] - ai[i];
4172       if (anzi) {
4173         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4174         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4175         nrows++;
4176       }
4177     }
4178     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4179     k++;
4180     buf_si += len_si[proc];
4181   }
4182 
4183   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4184   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4185 
4186   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4187   for (i=0; i<merge->nrecv; i++) {
4188     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4189   }
4190 
4191   ierr = PetscFree(len_si);CHKERRQ(ierr);
4192   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4193   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4194   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4195   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4196   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4197   ierr = PetscFree(status);CHKERRQ(ierr);
4198 
4199   /* compute a local seq matrix in each processor */
4200   /*----------------------------------------------*/
4201   /* allocate bi array and free space for accumulating nonzero column info */
4202   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4203   bi[0] = 0;
4204 
4205   /* create and initialize a linked list */
4206   nlnk = N+1;
4207   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4208 
4209   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4210   len  = ai[owners[rank+1]] - ai[owners[rank]];
4211   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4212 
4213   current_space = free_space;
4214 
4215   /* determine symbolic info for each local row */
4216   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4217 
4218   for (k=0; k<merge->nrecv; k++) {
4219     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4220     nrows       = *buf_ri_k[k];
4221     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4222     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4223   }
4224 
4225   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4226   len  = 0;
4227   for (i=0; i<m; i++) {
4228     bnzi = 0;
4229     /* add local non-zero cols of this proc's seqmat into lnk */
4230     arow  = owners[rank] + i;
4231     anzi  = ai[arow+1] - ai[arow];
4232     aj    = a->j + ai[arow];
4233     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4234     bnzi += nlnk;
4235     /* add received col data into lnk */
4236     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4237       if (i == *nextrow[k]) { /* i-th row */
4238         anzi  = *(nextai[k]+1) - *nextai[k];
4239         aj    = buf_rj[k] + *nextai[k];
4240         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4241         bnzi += nlnk;
4242         nextrow[k]++; nextai[k]++;
4243       }
4244     }
4245     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4246 
4247     /* if free space is not available, make more free space */
4248     if (current_space->local_remaining<bnzi) {
4249       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4250       nspacedouble++;
4251     }
4252     /* copy data into free space, then initialize lnk */
4253     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4254     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4255 
4256     current_space->array           += bnzi;
4257     current_space->local_used      += bnzi;
4258     current_space->local_remaining -= bnzi;
4259 
4260     bi[i+1] = bi[i] + bnzi;
4261   }
4262 
4263   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4264 
4265   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4266   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4267   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4268 
4269   /* create symbolic parallel matrix B_mpi */
4270   /*---------------------------------------*/
4271   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4272   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4273   if (n==PETSC_DECIDE) {
4274     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4275   } else {
4276     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4277   }
4278   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4279   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4280   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4281   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4282   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4283 
4284   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4285   B_mpi->assembled    = PETSC_FALSE;
4286   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4287   merge->bi           = bi;
4288   merge->bj           = bj;
4289   merge->buf_ri       = buf_ri;
4290   merge->buf_rj       = buf_rj;
4291   merge->coi          = NULL;
4292   merge->coj          = NULL;
4293   merge->owners_co    = NULL;
4294 
4295   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4296 
4297   /* attach the supporting struct to B_mpi for reuse */
4298   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4299   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4300   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4301   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4302   *mpimat = B_mpi;
4303 
4304   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4305   PetscFunctionReturn(0);
4306 }
4307 
4308 #undef __FUNCT__
4309 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4310 /*@C
4311       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4312                  matrices from each processor
4313 
4314     Collective on MPI_Comm
4315 
4316    Input Parameters:
4317 +    comm - the communicators the parallel matrix will live on
4318 .    seqmat - the input sequential matrices
4319 .    m - number of local rows (or PETSC_DECIDE)
4320 .    n - number of local columns (or PETSC_DECIDE)
4321 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4322 
4323    Output Parameter:
4324 .    mpimat - the parallel matrix generated
4325 
4326     Level: advanced
4327 
4328    Notes:
4329      The dimensions of the sequential matrix in each processor MUST be the same.
4330      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4331      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4332 @*/
4333 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4334 {
4335   PetscErrorCode ierr;
4336   PetscMPIInt    size;
4337 
4338   PetscFunctionBegin;
4339   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4340   if (size == 1) {
4341     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4342     if (scall == MAT_INITIAL_MATRIX) {
4343       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4344     } else {
4345       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4346     }
4347     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4348     PetscFunctionReturn(0);
4349   }
4350   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4351   if (scall == MAT_INITIAL_MATRIX) {
4352     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4353   }
4354   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4355   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4356   PetscFunctionReturn(0);
4357 }
4358 
4359 #undef __FUNCT__
4360 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4361 /*@
4362      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4363           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4364           with MatGetSize()
4365 
4366     Not Collective
4367 
4368    Input Parameters:
4369 +    A - the matrix
4370 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4371 
4372    Output Parameter:
4373 .    A_loc - the local sequential matrix generated
4374 
4375     Level: developer
4376 
4377 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4378 
4379 @*/
4380 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4381 {
4382   PetscErrorCode ierr;
4383   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4384   Mat_SeqAIJ     *mat,*a,*b;
4385   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4386   MatScalar      *aa,*ba,*cam;
4387   PetscScalar    *ca;
4388   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4389   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4390   PetscBool      match;
4391   MPI_Comm       comm;
4392   PetscMPIInt    size;
4393 
4394   PetscFunctionBegin;
4395   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4396   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4397   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4398   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4399   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4400 
4401   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4402   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4403   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4404   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4405   aa = a->a; ba = b->a;
4406   if (scall == MAT_INITIAL_MATRIX) {
4407     if (size == 1) {
4408       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4409       PetscFunctionReturn(0);
4410     }
4411 
4412     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4413     ci[0] = 0;
4414     for (i=0; i<am; i++) {
4415       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4416     }
4417     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4418     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4419     k    = 0;
4420     for (i=0; i<am; i++) {
4421       ncols_o = bi[i+1] - bi[i];
4422       ncols_d = ai[i+1] - ai[i];
4423       /* off-diagonal portion of A */
4424       for (jo=0; jo<ncols_o; jo++) {
4425         col = cmap[*bj];
4426         if (col >= cstart) break;
4427         cj[k]   = col; bj++;
4428         ca[k++] = *ba++;
4429       }
4430       /* diagonal portion of A */
4431       for (j=0; j<ncols_d; j++) {
4432         cj[k]   = cstart + *aj++;
4433         ca[k++] = *aa++;
4434       }
4435       /* off-diagonal portion of A */
4436       for (j=jo; j<ncols_o; j++) {
4437         cj[k]   = cmap[*bj++];
4438         ca[k++] = *ba++;
4439       }
4440     }
4441     /* put together the new matrix */
4442     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4443     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4444     /* Since these are PETSc arrays, change flags to free them as necessary. */
4445     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4446     mat->free_a  = PETSC_TRUE;
4447     mat->free_ij = PETSC_TRUE;
4448     mat->nonew   = 0;
4449   } else if (scall == MAT_REUSE_MATRIX) {
4450     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4451     ci = mat->i; cj = mat->j; cam = mat->a;
4452     for (i=0; i<am; i++) {
4453       /* off-diagonal portion of A */
4454       ncols_o = bi[i+1] - bi[i];
4455       for (jo=0; jo<ncols_o; jo++) {
4456         col = cmap[*bj];
4457         if (col >= cstart) break;
4458         *cam++ = *ba++; bj++;
4459       }
4460       /* diagonal portion of A */
4461       ncols_d = ai[i+1] - ai[i];
4462       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4463       /* off-diagonal portion of A */
4464       for (j=jo; j<ncols_o; j++) {
4465         *cam++ = *ba++; bj++;
4466       }
4467     }
4468   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4469   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4470   PetscFunctionReturn(0);
4471 }
4472 
4473 #undef __FUNCT__
4474 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4475 /*@C
4476      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4477 
4478     Not Collective
4479 
4480    Input Parameters:
4481 +    A - the matrix
4482 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4483 -    row, col - index sets of rows and columns to extract (or NULL)
4484 
4485    Output Parameter:
4486 .    A_loc - the local sequential matrix generated
4487 
4488     Level: developer
4489 
4490 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4491 
4492 @*/
4493 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4494 {
4495   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4496   PetscErrorCode ierr;
4497   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4498   IS             isrowa,iscola;
4499   Mat            *aloc;
4500   PetscBool      match;
4501 
4502   PetscFunctionBegin;
4503   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4504   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4505   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4506   if (!row) {
4507     start = A->rmap->rstart; end = A->rmap->rend;
4508     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4509   } else {
4510     isrowa = *row;
4511   }
4512   if (!col) {
4513     start = A->cmap->rstart;
4514     cmap  = a->garray;
4515     nzA   = a->A->cmap->n;
4516     nzB   = a->B->cmap->n;
4517     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4518     ncols = 0;
4519     for (i=0; i<nzB; i++) {
4520       if (cmap[i] < start) idx[ncols++] = cmap[i];
4521       else break;
4522     }
4523     imark = i;
4524     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4525     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4526     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4527   } else {
4528     iscola = *col;
4529   }
4530   if (scall != MAT_INITIAL_MATRIX) {
4531     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4532     aloc[0] = *A_loc;
4533   }
4534   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4535   *A_loc = aloc[0];
4536   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4537   if (!row) {
4538     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4539   }
4540   if (!col) {
4541     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4542   }
4543   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4544   PetscFunctionReturn(0);
4545 }
4546 
4547 #undef __FUNCT__
4548 #define __FUNCT__ "MatGetBrowsOfAcols"
4549 /*@C
4550     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4551 
4552     Collective on Mat
4553 
4554    Input Parameters:
4555 +    A,B - the matrices in mpiaij format
4556 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4557 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4558 
4559    Output Parameter:
4560 +    rowb, colb - index sets of rows and columns of B to extract
4561 -    B_seq - the sequential matrix generated
4562 
4563     Level: developer
4564 
4565 @*/
4566 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4567 {
4568   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4569   PetscErrorCode ierr;
4570   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4571   IS             isrowb,iscolb;
4572   Mat            *bseq=NULL;
4573 
4574   PetscFunctionBegin;
4575   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4576     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4577   }
4578   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4579 
4580   if (scall == MAT_INITIAL_MATRIX) {
4581     start = A->cmap->rstart;
4582     cmap  = a->garray;
4583     nzA   = a->A->cmap->n;
4584     nzB   = a->B->cmap->n;
4585     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4586     ncols = 0;
4587     for (i=0; i<nzB; i++) {  /* row < local row index */
4588       if (cmap[i] < start) idx[ncols++] = cmap[i];
4589       else break;
4590     }
4591     imark = i;
4592     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4593     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4594     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4595     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4596   } else {
4597     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4598     isrowb  = *rowb; iscolb = *colb;
4599     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4600     bseq[0] = *B_seq;
4601   }
4602   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4603   *B_seq = bseq[0];
4604   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4605   if (!rowb) {
4606     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4607   } else {
4608     *rowb = isrowb;
4609   }
4610   if (!colb) {
4611     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4612   } else {
4613     *colb = iscolb;
4614   }
4615   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4616   PetscFunctionReturn(0);
4617 }
4618 
4619 #undef __FUNCT__
4620 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4621 /*
4622     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4623     of the OFF-DIAGONAL portion of local A
4624 
4625     Collective on Mat
4626 
4627    Input Parameters:
4628 +    A,B - the matrices in mpiaij format
4629 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4630 
4631    Output Parameter:
4632 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4633 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4634 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4635 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4636 
4637     Level: developer
4638 
4639 */
4640 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4641 {
4642   VecScatter_MPI_General *gen_to,*gen_from;
4643   PetscErrorCode         ierr;
4644   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4645   Mat_SeqAIJ             *b_oth;
4646   VecScatter             ctx =a->Mvctx;
4647   MPI_Comm               comm;
4648   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4649   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4650   PetscScalar            *rvalues,*svalues;
4651   MatScalar              *b_otha,*bufa,*bufA;
4652   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4653   MPI_Request            *rwaits = NULL,*swaits = NULL;
4654   MPI_Status             *sstatus,rstatus;
4655   PetscMPIInt            jj,size;
4656   PetscInt               *cols,sbs,rbs;
4657   PetscScalar            *vals;
4658 
4659   PetscFunctionBegin;
4660   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4661   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4662 
4663   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4664     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4665   }
4666   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4667   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4668 
4669   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4670   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4671   rvalues  = gen_from->values; /* holds the length of receiving row */
4672   svalues  = gen_to->values;   /* holds the length of sending row */
4673   nrecvs   = gen_from->n;
4674   nsends   = gen_to->n;
4675 
4676   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4677   srow    = gen_to->indices;    /* local row index to be sent */
4678   sstarts = gen_to->starts;
4679   sprocs  = gen_to->procs;
4680   sstatus = gen_to->sstatus;
4681   sbs     = gen_to->bs;
4682   rstarts = gen_from->starts;
4683   rprocs  = gen_from->procs;
4684   rbs     = gen_from->bs;
4685 
4686   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4687   if (scall == MAT_INITIAL_MATRIX) {
4688     /* i-array */
4689     /*---------*/
4690     /*  post receives */
4691     for (i=0; i<nrecvs; i++) {
4692       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4693       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4694       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4695     }
4696 
4697     /* pack the outgoing message */
4698     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4699 
4700     sstartsj[0] = 0;
4701     rstartsj[0] = 0;
4702     len         = 0; /* total length of j or a array to be sent */
4703     k           = 0;
4704     for (i=0; i<nsends; i++) {
4705       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4706       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4707       for (j=0; j<nrows; j++) {
4708         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4709         for (l=0; l<sbs; l++) {
4710           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4711 
4712           rowlen[j*sbs+l] = ncols;
4713 
4714           len += ncols;
4715           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4716         }
4717         k++;
4718       }
4719       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4720 
4721       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4722     }
4723     /* recvs and sends of i-array are completed */
4724     i = nrecvs;
4725     while (i--) {
4726       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4727     }
4728     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4729 
4730     /* allocate buffers for sending j and a arrays */
4731     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4732     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4733 
4734     /* create i-array of B_oth */
4735     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4736 
4737     b_othi[0] = 0;
4738     len       = 0; /* total length of j or a array to be received */
4739     k         = 0;
4740     for (i=0; i<nrecvs; i++) {
4741       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4742       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4743       for (j=0; j<nrows; j++) {
4744         b_othi[k+1] = b_othi[k] + rowlen[j];
4745         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4746         k++;
4747       }
4748       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4749     }
4750 
4751     /* allocate space for j and a arrrays of B_oth */
4752     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4753     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4754 
4755     /* j-array */
4756     /*---------*/
4757     /*  post receives of j-array */
4758     for (i=0; i<nrecvs; i++) {
4759       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4760       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4761     }
4762 
4763     /* pack the outgoing message j-array */
4764     k = 0;
4765     for (i=0; i<nsends; i++) {
4766       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4767       bufJ  = bufj+sstartsj[i];
4768       for (j=0; j<nrows; j++) {
4769         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4770         for (ll=0; ll<sbs; ll++) {
4771           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4772           for (l=0; l<ncols; l++) {
4773             *bufJ++ = cols[l];
4774           }
4775           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4776         }
4777       }
4778       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4779     }
4780 
4781     /* recvs and sends of j-array are completed */
4782     i = nrecvs;
4783     while (i--) {
4784       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4785     }
4786     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4787   } else if (scall == MAT_REUSE_MATRIX) {
4788     sstartsj = *startsj_s;
4789     rstartsj = *startsj_r;
4790     bufa     = *bufa_ptr;
4791     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4792     b_otha   = b_oth->a;
4793   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4794 
4795   /* a-array */
4796   /*---------*/
4797   /*  post receives of a-array */
4798   for (i=0; i<nrecvs; i++) {
4799     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4800     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4801   }
4802 
4803   /* pack the outgoing message a-array */
4804   k = 0;
4805   for (i=0; i<nsends; i++) {
4806     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4807     bufA  = bufa+sstartsj[i];
4808     for (j=0; j<nrows; j++) {
4809       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4810       for (ll=0; ll<sbs; ll++) {
4811         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4812         for (l=0; l<ncols; l++) {
4813           *bufA++ = vals[l];
4814         }
4815         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4816       }
4817     }
4818     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4819   }
4820   /* recvs and sends of a-array are completed */
4821   i = nrecvs;
4822   while (i--) {
4823     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4824   }
4825   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4826   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4827 
4828   if (scall == MAT_INITIAL_MATRIX) {
4829     /* put together the new matrix */
4830     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4831 
4832     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4833     /* Since these are PETSc arrays, change flags to free them as necessary. */
4834     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4835     b_oth->free_a  = PETSC_TRUE;
4836     b_oth->free_ij = PETSC_TRUE;
4837     b_oth->nonew   = 0;
4838 
4839     ierr = PetscFree(bufj);CHKERRQ(ierr);
4840     if (!startsj_s || !bufa_ptr) {
4841       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4842       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4843     } else {
4844       *startsj_s = sstartsj;
4845       *startsj_r = rstartsj;
4846       *bufa_ptr  = bufa;
4847     }
4848   }
4849   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4850   PetscFunctionReturn(0);
4851 }
4852 
4853 #undef __FUNCT__
4854 #define __FUNCT__ "MatGetCommunicationStructs"
4855 /*@C
4856   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4857 
4858   Not Collective
4859 
4860   Input Parameters:
4861 . A - The matrix in mpiaij format
4862 
4863   Output Parameter:
4864 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4865 . colmap - A map from global column index to local index into lvec
4866 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4867 
4868   Level: developer
4869 
4870 @*/
4871 #if defined(PETSC_USE_CTABLE)
4872 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4873 #else
4874 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4875 #endif
4876 {
4877   Mat_MPIAIJ *a;
4878 
4879   PetscFunctionBegin;
4880   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4881   PetscValidPointer(lvec, 2);
4882   PetscValidPointer(colmap, 3);
4883   PetscValidPointer(multScatter, 4);
4884   a = (Mat_MPIAIJ*) A->data;
4885   if (lvec) *lvec = a->lvec;
4886   if (colmap) *colmap = a->colmap;
4887   if (multScatter) *multScatter = a->Mvctx;
4888   PetscFunctionReturn(0);
4889 }
4890 
4891 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4892 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4893 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4894 #if defined(PETSC_HAVE_ELEMENTAL)
4895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4896 #endif
4897 #if defined(PETSC_HAVE_HYPRE)
4898 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
4899 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
4900 #endif
4901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
4902 
4903 #undef __FUNCT__
4904 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4905 /*
4906     Computes (B'*A')' since computing B*A directly is untenable
4907 
4908                n                       p                          p
4909         (              )       (              )         (                  )
4910       m (      A       )  *  n (       B      )   =   m (         C        )
4911         (              )       (              )         (                  )
4912 
4913 */
4914 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4915 {
4916   PetscErrorCode ierr;
4917   Mat            At,Bt,Ct;
4918 
4919   PetscFunctionBegin;
4920   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4921   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4922   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4923   ierr = MatDestroy(&At);CHKERRQ(ierr);
4924   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4925   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4926   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4927   PetscFunctionReturn(0);
4928 }
4929 
4930 #undef __FUNCT__
4931 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4932 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4933 {
4934   PetscErrorCode ierr;
4935   PetscInt       m=A->rmap->n,n=B->cmap->n;
4936   Mat            Cmat;
4937 
4938   PetscFunctionBegin;
4939   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4940   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4941   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4942   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4943   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4944   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4945   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4946   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4947 
4948   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4949 
4950   *C = Cmat;
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 /* ----------------------------------------------------------------*/
4955 #undef __FUNCT__
4956 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4957 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4958 {
4959   PetscErrorCode ierr;
4960 
4961   PetscFunctionBegin;
4962   if (scall == MAT_INITIAL_MATRIX) {
4963     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4964     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4965     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4966   }
4967   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4968   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4969   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4970   PetscFunctionReturn(0);
4971 }
4972 
4973 /*MC
4974    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4975 
4976    Options Database Keys:
4977 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4978 
4979   Level: beginner
4980 
4981 .seealso: MatCreateAIJ()
4982 M*/
4983 
4984 #undef __FUNCT__
4985 #define __FUNCT__ "MatCreate_MPIAIJ"
4986 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4987 {
4988   Mat_MPIAIJ     *b;
4989   PetscErrorCode ierr;
4990   PetscMPIInt    size;
4991 
4992   PetscFunctionBegin;
4993   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4994 
4995   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4996   B->data       = (void*)b;
4997   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4998   B->assembled  = PETSC_FALSE;
4999   B->insertmode = NOT_SET_VALUES;
5000   b->size       = size;
5001 
5002   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5003 
5004   /* build cache for off array entries formed */
5005   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5006 
5007   b->donotstash  = PETSC_FALSE;
5008   b->colmap      = 0;
5009   b->garray      = 0;
5010   b->roworiented = PETSC_TRUE;
5011 
5012   /* stuff used for matrix vector multiply */
5013   b->lvec  = NULL;
5014   b->Mvctx = NULL;
5015 
5016   /* stuff for MatGetRow() */
5017   b->rowindices   = 0;
5018   b->rowvalues    = 0;
5019   b->getrowactive = PETSC_FALSE;
5020 
5021   /* flexible pointer used in CUSP/CUSPARSE classes */
5022   b->spptr = NULL;
5023 
5024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5034 #if defined(PETSC_HAVE_ELEMENTAL)
5035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5036 #endif
5037 #if defined(PETSC_HAVE_HYPRE)
5038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5039 #endif
5040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5044 #if defined(PETSC_HAVE_HYPRE)
5045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5046 #endif
5047   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5048   PetscFunctionReturn(0);
5049 }
5050 
5051 #undef __FUNCT__
5052 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5053 /*@C
5054      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5055          and "off-diagonal" part of the matrix in CSR format.
5056 
5057    Collective on MPI_Comm
5058 
5059    Input Parameters:
5060 +  comm - MPI communicator
5061 .  m - number of local rows (Cannot be PETSC_DECIDE)
5062 .  n - This value should be the same as the local size used in creating the
5063        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5064        calculated if N is given) For square matrices n is almost always m.
5065 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5066 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5067 .   i - row indices for "diagonal" portion of matrix
5068 .   j - column indices
5069 .   a - matrix values
5070 .   oi - row indices for "off-diagonal" portion of matrix
5071 .   oj - column indices
5072 -   oa - matrix values
5073 
5074    Output Parameter:
5075 .   mat - the matrix
5076 
5077    Level: advanced
5078 
5079    Notes:
5080        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5081        must free the arrays once the matrix has been destroyed and not before.
5082 
5083        The i and j indices are 0 based
5084 
5085        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5086 
5087        This sets local rows and cannot be used to set off-processor values.
5088 
5089        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5090        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5091        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5092        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5093        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5094        communication if it is known that only local entries will be set.
5095 
5096 .keywords: matrix, aij, compressed row, sparse, parallel
5097 
5098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5099           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5100 @*/
5101 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5102 {
5103   PetscErrorCode ierr;
5104   Mat_MPIAIJ     *maij;
5105 
5106   PetscFunctionBegin;
5107   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5108   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5109   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5110   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5111   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5112   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5113   maij = (Mat_MPIAIJ*) (*mat)->data;
5114 
5115   (*mat)->preallocated = PETSC_TRUE;
5116 
5117   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5118   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5119 
5120   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5121   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5122 
5123   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5124   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5125   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5126   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5127 
5128   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5129   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5130   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5131   PetscFunctionReturn(0);
5132 }
5133 
5134 /*
5135     Special version for direct calls from Fortran
5136 */
5137 #include <petsc/private/fortranimpl.h>
5138 
5139 /* Change these macros so can be used in void function */
5140 #undef CHKERRQ
5141 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5142 #undef SETERRQ2
5143 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5144 #undef SETERRQ3
5145 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5146 #undef SETERRQ
5147 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5148 
5149 #undef __FUNCT__
5150 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5151 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5152 #define __FUNCT__ "MATSETVALUESMPIAIJ"
5153 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5154 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5155 #define __FUNCT__ "matsetvaluesmpiaij"
5156 #else
5157 #define __FUNCT__ "matsetvaluesmpiaij_"
5158 #endif
5159 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5160 {
5161   Mat            mat  = *mmat;
5162   PetscInt       m    = *mm, n = *mn;
5163   InsertMode     addv = *maddv;
5164   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5165   PetscScalar    value;
5166   PetscErrorCode ierr;
5167 
5168   MatCheckPreallocated(mat,1);
5169   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5170 
5171 #if defined(PETSC_USE_DEBUG)
5172   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5173 #endif
5174   {
5175     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5176     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5177     PetscBool roworiented = aij->roworiented;
5178 
5179     /* Some Variables required in the macro */
5180     Mat        A                 = aij->A;
5181     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5182     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5183     MatScalar  *aa               = a->a;
5184     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5185     Mat        B                 = aij->B;
5186     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5187     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5188     MatScalar  *ba               = b->a;
5189 
5190     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5191     PetscInt  nonew = a->nonew;
5192     MatScalar *ap1,*ap2;
5193 
5194     PetscFunctionBegin;
5195     for (i=0; i<m; i++) {
5196       if (im[i] < 0) continue;
5197 #if defined(PETSC_USE_DEBUG)
5198       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5199 #endif
5200       if (im[i] >= rstart && im[i] < rend) {
5201         row      = im[i] - rstart;
5202         lastcol1 = -1;
5203         rp1      = aj + ai[row];
5204         ap1      = aa + ai[row];
5205         rmax1    = aimax[row];
5206         nrow1    = ailen[row];
5207         low1     = 0;
5208         high1    = nrow1;
5209         lastcol2 = -1;
5210         rp2      = bj + bi[row];
5211         ap2      = ba + bi[row];
5212         rmax2    = bimax[row];
5213         nrow2    = bilen[row];
5214         low2     = 0;
5215         high2    = nrow2;
5216 
5217         for (j=0; j<n; j++) {
5218           if (roworiented) value = v[i*n+j];
5219           else value = v[i+j*m];
5220           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5221           if (in[j] >= cstart && in[j] < cend) {
5222             col = in[j] - cstart;
5223             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5224           } else if (in[j] < 0) continue;
5225 #if defined(PETSC_USE_DEBUG)
5226           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5227 #endif
5228           else {
5229             if (mat->was_assembled) {
5230               if (!aij->colmap) {
5231                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5232               }
5233 #if defined(PETSC_USE_CTABLE)
5234               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5235               col--;
5236 #else
5237               col = aij->colmap[in[j]] - 1;
5238 #endif
5239               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5240                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5241                 col  =  in[j];
5242                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5243                 B     = aij->B;
5244                 b     = (Mat_SeqAIJ*)B->data;
5245                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5246                 rp2   = bj + bi[row];
5247                 ap2   = ba + bi[row];
5248                 rmax2 = bimax[row];
5249                 nrow2 = bilen[row];
5250                 low2  = 0;
5251                 high2 = nrow2;
5252                 bm    = aij->B->rmap->n;
5253                 ba    = b->a;
5254               }
5255             } else col = in[j];
5256             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5257           }
5258         }
5259       } else if (!aij->donotstash) {
5260         if (roworiented) {
5261           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5262         } else {
5263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5264         }
5265       }
5266     }
5267   }
5268   PetscFunctionReturnVoid();
5269 }
5270 
5271