xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 26bda2c46e6cf0fc720fe1c3d1cb20e6fede842e)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatSetLateBlockSizes_MPIAIJ"
47 PetscErrorCode MatSetLateBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (rbs && !cbs) {
54     ierr = MatSetBlockSize(mat->A,rbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSize(mat->B,rbs);CHKERRQ(ierr);
56   } else if (rbs && cbs) {
57     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
58     ierr = MatSetBlockSize(mat->B,rbs);CHKERRQ(ierr);
59   } else SETERRQ2(PetscObjectComm((PetscObject)M),PETSC_ERR_ARG_WRONG,"Cannot set late block sizes %D %D",rbs,cbs);
60   PetscFunctionReturn(0);
61 }
62 
63 #undef __FUNCT__
64 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
65 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
66 {
67   PetscErrorCode  ierr;
68   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
69   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
70   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
71   const PetscInt  *ia,*ib;
72   const MatScalar *aa,*bb;
73   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
74   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
75 
76   PetscFunctionBegin;
77   *keptrows = 0;
78   ia        = a->i;
79   ib        = b->i;
80   for (i=0; i<m; i++) {
81     na = ia[i+1] - ia[i];
82     nb = ib[i+1] - ib[i];
83     if (!na && !nb) {
84       cnt++;
85       goto ok1;
86     }
87     aa = a->a + ia[i];
88     for (j=0; j<na; j++) {
89       if (aa[j] != 0.0) goto ok1;
90     }
91     bb = b->a + ib[i];
92     for (j=0; j <nb; j++) {
93       if (bb[j] != 0.0) goto ok1;
94     }
95     cnt++;
96 ok1:;
97   }
98   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
99   if (!n0rows) PetscFunctionReturn(0);
100   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
101   cnt  = 0;
102   for (i=0; i<m; i++) {
103     na = ia[i+1] - ia[i];
104     nb = ib[i+1] - ib[i];
105     if (!na && !nb) continue;
106     aa = a->a + ia[i];
107     for (j=0; j<na;j++) {
108       if (aa[j] != 0.0) {
109         rows[cnt++] = rstart + i;
110         goto ok2;
111       }
112     }
113     bb = b->a + ib[i];
114     for (j=0; j<nb; j++) {
115       if (bb[j] != 0.0) {
116         rows[cnt++] = rstart + i;
117         goto ok2;
118       }
119     }
120 ok2:;
121   }
122   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
123   PetscFunctionReturn(0);
124 }
125 
126 #undef __FUNCT__
127 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
128 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
129 {
130   PetscErrorCode    ierr;
131   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
132 
133   PetscFunctionBegin;
134   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
135     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
136   } else {
137     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
145 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
146 {
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
148   PetscErrorCode ierr;
149   PetscInt       i,rstart,nrows,*rows;
150 
151   PetscFunctionBegin;
152   *zrows = NULL;
153   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
154   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
155   for (i=0; i<nrows; i++) rows[i] += rstart;
156   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
157   PetscFunctionReturn(0);
158 }
159 
160 #undef __FUNCT__
161 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
162 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
163 {
164   PetscErrorCode ierr;
165   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
166   PetscInt       i,n,*garray = aij->garray;
167   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
168   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
169   PetscReal      *work;
170 
171   PetscFunctionBegin;
172   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
173   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
174   if (type == NORM_2) {
175     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
176       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
177     }
178     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
179       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
180     }
181   } else if (type == NORM_1) {
182     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
183       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
184     }
185     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
186       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
187     }
188   } else if (type == NORM_INFINITY) {
189     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
190       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
191     }
192     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
193       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
194     }
195 
196   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
197   if (type == NORM_INFINITY) {
198     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
199   } else {
200     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
201   }
202   ierr = PetscFree(work);CHKERRQ(ierr);
203   if (type == NORM_2) {
204     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
205   }
206   PetscFunctionReturn(0);
207 }
208 
209 #undef __FUNCT__
210 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
211 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
212 {
213   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
214   IS              sis,gis;
215   PetscErrorCode  ierr;
216   const PetscInt  *isis,*igis;
217   PetscInt        n,*iis,nsis,ngis,rstart,i;
218 
219   PetscFunctionBegin;
220   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
221   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
222   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
223   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
224   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
226 
227   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
228   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
229   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
230   n    = ngis + nsis;
231   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
232   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
233   for (i=0; i<n; i++) iis[i] += rstart;
234   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
235 
236   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
238   ierr = ISDestroy(&sis);CHKERRQ(ierr);
239   ierr = ISDestroy(&gis);CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatDistribute_MPIAIJ"
245 /*
246     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
247     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
248 
249     Only for square matrices
250 
251     Used by a preconditioner, hence PETSC_EXTERN
252 */
253 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
254 {
255   PetscMPIInt    rank,size;
256   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
257   PetscErrorCode ierr;
258   Mat            mat;
259   Mat_SeqAIJ     *gmata;
260   PetscMPIInt    tag;
261   MPI_Status     status;
262   PetscBool      aij;
263   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
264 
265   PetscFunctionBegin;
266   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
267   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
268   if (!rank) {
269     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
270     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
271   }
272   if (reuse == MAT_INITIAL_MATRIX) {
273     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
274     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
275     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
276     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
277     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
278     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
279     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
280     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
281     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
282 
283     rowners[0] = 0;
284     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
285     rstart = rowners[rank];
286     rend   = rowners[rank+1];
287     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
288     if (!rank) {
289       gmata = (Mat_SeqAIJ*) gmat->data;
290       /* send row lengths to all processors */
291       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
292       for (i=1; i<size; i++) {
293         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295       /* determine number diagonal and off-diagonal counts */
296       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
297       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
298       jj   = 0;
299       for (i=0; i<m; i++) {
300         for (j=0; j<dlens[i]; j++) {
301           if (gmata->j[jj] < rstart) ld[i]++;
302           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
303           jj++;
304         }
305       }
306       /* send column indices to other processes */
307       for (i=1; i<size; i++) {
308         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
309         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
310         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
311       }
312 
313       /* send numerical values to other processes */
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       gmataa = gmata->a;
319       gmataj = gmata->j;
320 
321     } else {
322       /* receive row lengths */
323       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
324       /* receive column indices */
325       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
326       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
328       /* determine number diagonal and off-diagonal counts */
329       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
330       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
331       jj   = 0;
332       for (i=0; i<m; i++) {
333         for (j=0; j<dlens[i]; j++) {
334           if (gmataj[jj] < rstart) ld[i]++;
335           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
336           jj++;
337         }
338       }
339       /* receive numerical values */
340       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
342     }
343     /* set preallocation */
344     for (i=0; i<m; i++) {
345       dlens[i] -= olens[i];
346     }
347     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
348     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
349 
350     for (i=0; i<m; i++) {
351       dlens[i] += olens[i];
352     }
353     cnt = 0;
354     for (i=0; i<m; i++) {
355       row  = rstart + i;
356       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
357       cnt += dlens[i];
358     }
359     if (rank) {
360       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
361     }
362     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
363     ierr = PetscFree(rowners);CHKERRQ(ierr);
364 
365     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
366 
367     *inmat = mat;
368   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
369     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
370     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
371     mat  = *inmat;
372     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
373     if (!rank) {
374       /* send numerical values to other processes */
375       gmata  = (Mat_SeqAIJ*) gmat->data;
376       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
377       gmataa = gmata->a;
378       for (i=1; i<size; i++) {
379         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
380         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
381       }
382       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
383     } else {
384       /* receive numerical values from process 0*/
385       nz   = Ad->nz + Ao->nz;
386       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
387       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
388     }
389     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
390     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
391     ad = Ad->a;
392     ao = Ao->a;
393     if (mat->rmap->n) {
394       i  = 0;
395       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
396       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
397     }
398     for (i=1; i<mat->rmap->n; i++) {
399       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
400       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
401     }
402     i--;
403     if (mat->rmap->n) {
404       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
405     }
406     if (rank) {
407       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
408     }
409   }
410   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
411   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
412   PetscFunctionReturn(0);
413 }
414 
415 /*
416   Local utility routine that creates a mapping from the global column
417 number to the local number in the off-diagonal part of the local
418 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
419 a slightly higher hash table cost; without it it is not scalable (each processor
420 has an order N integer array but is fast to acess.
421 */
422 #undef __FUNCT__
423 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
424 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
425 {
426   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
427   PetscErrorCode ierr;
428   PetscInt       n = aij->B->cmap->n,i;
429 
430   PetscFunctionBegin;
431   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
432 #if defined(PETSC_USE_CTABLE)
433   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
434   for (i=0; i<n; i++) {
435     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
436   }
437 #else
438   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
439   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
440   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
441 #endif
442   PetscFunctionReturn(0);
443 }
444 
445 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
446 { \
447     if (col <= lastcol1)  low1 = 0;     \
448     else                 high1 = nrow1; \
449     lastcol1 = col;\
450     while (high1-low1 > 5) { \
451       t = (low1+high1)/2; \
452       if (rp1[t] > col) high1 = t; \
453       else              low1  = t; \
454     } \
455       for (_i=low1; _i<high1; _i++) { \
456         if (rp1[_i] > col) break; \
457         if (rp1[_i] == col) { \
458           if (addv == ADD_VALUES) ap1[_i] += value;   \
459           else                    ap1[_i] = value; \
460           goto a_noinsert; \
461         } \
462       }  \
463       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
464       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
465       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
466       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
467       N = nrow1++ - 1; a->nz++; high1++; \
468       /* shift up all the later entries in this row */ \
469       for (ii=N; ii>=_i; ii--) { \
470         rp1[ii+1] = rp1[ii]; \
471         ap1[ii+1] = ap1[ii]; \
472       } \
473       rp1[_i] = col;  \
474       ap1[_i] = value;  \
475       A->nonzerostate++;\
476       a_noinsert: ; \
477       ailen[row] = nrow1; \
478 }
479 
480 
481 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
482   { \
483     if (col <= lastcol2) low2 = 0;                        \
484     else high2 = nrow2;                                   \
485     lastcol2 = col;                                       \
486     while (high2-low2 > 5) {                              \
487       t = (low2+high2)/2;                                 \
488       if (rp2[t] > col) high2 = t;                        \
489       else             low2  = t;                         \
490     }                                                     \
491     for (_i=low2; _i<high2; _i++) {                       \
492       if (rp2[_i] > col) break;                           \
493       if (rp2[_i] == col) {                               \
494         if (addv == ADD_VALUES) ap2[_i] += value;         \
495         else                    ap2[_i] = value;          \
496         goto b_noinsert;                                  \
497       }                                                   \
498     }                                                     \
499     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
500     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
501     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
502     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
503     N = nrow2++ - 1; b->nz++; high2++;                    \
504     /* shift up all the later entries in this row */      \
505     for (ii=N; ii>=_i; ii--) {                            \
506       rp2[ii+1] = rp2[ii];                                \
507       ap2[ii+1] = ap2[ii];                                \
508     }                                                     \
509     rp2[_i] = col;                                        \
510     ap2[_i] = value;                                      \
511     B->nonzerostate++;                                    \
512     b_noinsert: ;                                         \
513     bilen[row] = nrow2;                                   \
514   }
515 
516 #undef __FUNCT__
517 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
518 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
519 {
520   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
521   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
522   PetscErrorCode ierr;
523   PetscInt       l,*garray = mat->garray,diag;
524 
525   PetscFunctionBegin;
526   /* code only works for square matrices A */
527 
528   /* find size of row to the left of the diagonal part */
529   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
530   row  = row - diag;
531   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
532     if (garray[b->j[b->i[row]+l]] > diag) break;
533   }
534   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
535 
536   /* diagonal part */
537   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
538 
539   /* right of diagonal part */
540   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatSetValues_MPIAIJ"
546 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
547 {
548   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
549   PetscScalar    value;
550   PetscErrorCode ierr;
551   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
552   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
553   PetscBool      roworiented = aij->roworiented;
554 
555   /* Some Variables required in the macro */
556   Mat        A                 = aij->A;
557   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
558   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
559   MatScalar  *aa               = a->a;
560   PetscBool  ignorezeroentries = a->ignorezeroentries;
561   Mat        B                 = aij->B;
562   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
563   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
564   MatScalar  *ba               = b->a;
565 
566   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
567   PetscInt  nonew;
568   MatScalar *ap1,*ap2;
569 
570   PetscFunctionBegin;
571   for (i=0; i<m; i++) {
572     if (im[i] < 0) continue;
573 #if defined(PETSC_USE_DEBUG)
574     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
575 #endif
576     if (im[i] >= rstart && im[i] < rend) {
577       row      = im[i] - rstart;
578       lastcol1 = -1;
579       rp1      = aj + ai[row];
580       ap1      = aa + ai[row];
581       rmax1    = aimax[row];
582       nrow1    = ailen[row];
583       low1     = 0;
584       high1    = nrow1;
585       lastcol2 = -1;
586       rp2      = bj + bi[row];
587       ap2      = ba + bi[row];
588       rmax2    = bimax[row];
589       nrow2    = bilen[row];
590       low2     = 0;
591       high2    = nrow2;
592 
593       for (j=0; j<n; j++) {
594         if (roworiented) value = v[i*n+j];
595         else             value = v[i+j*m];
596         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
597         if (in[j] >= cstart && in[j] < cend) {
598           col   = in[j] - cstart;
599           nonew = a->nonew;
600           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
601         } else if (in[j] < 0) continue;
602 #if defined(PETSC_USE_DEBUG)
603         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
604 #endif
605         else {
606           if (mat->was_assembled) {
607             if (!aij->colmap) {
608               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609             }
610 #if defined(PETSC_USE_CTABLE)
611             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
612             col--;
613 #else
614             col = aij->colmap[in[j]] - 1;
615 #endif
616             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
617               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
618               col  =  in[j];
619               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
620               B     = aij->B;
621               b     = (Mat_SeqAIJ*)B->data;
622               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
623               rp2   = bj + bi[row];
624               ap2   = ba + bi[row];
625               rmax2 = bimax[row];
626               nrow2 = bilen[row];
627               low2  = 0;
628               high2 = nrow2;
629               bm    = aij->B->rmap->n;
630               ba    = b->a;
631             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
632           } else col = in[j];
633           nonew = b->nonew;
634           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
635         }
636       }
637     } else {
638       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
639       if (!aij->donotstash) {
640         mat->assembled = PETSC_FALSE;
641         if (roworiented) {
642           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
643         } else {
644           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
645         }
646       }
647     }
648   }
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatGetValues_MPIAIJ"
654 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
655 {
656   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
657   PetscErrorCode ierr;
658   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
659   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
660 
661   PetscFunctionBegin;
662   for (i=0; i<m; i++) {
663     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
664     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
665     if (idxm[i] >= rstart && idxm[i] < rend) {
666       row = idxm[i] - rstart;
667       for (j=0; j<n; j++) {
668         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
669         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
670         if (idxn[j] >= cstart && idxn[j] < cend) {
671           col  = idxn[j] - cstart;
672           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
673         } else {
674           if (!aij->colmap) {
675             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
676           }
677 #if defined(PETSC_USE_CTABLE)
678           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
679           col--;
680 #else
681           col = aij->colmap[idxn[j]] - 1;
682 #endif
683           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
684           else {
685             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
686           }
687         }
688       }
689     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
695 
696 #undef __FUNCT__
697 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
698 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
699 {
700   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
701   PetscErrorCode ierr;
702   PetscInt       nstash,reallocs;
703 
704   PetscFunctionBegin;
705   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
706 
707   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
708   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
709   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
715 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
716 {
717   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
718   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
719   PetscErrorCode ierr;
720   PetscMPIInt    n;
721   PetscInt       i,j,rstart,ncols,flg;
722   PetscInt       *row,*col;
723   PetscBool      other_disassembled;
724   PetscScalar    *val;
725 
726   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
727 
728   PetscFunctionBegin;
729   if (!aij->donotstash && !mat->nooffprocentries) {
730     while (1) {
731       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
732       if (!flg) break;
733 
734       for (i=0; i<n; ) {
735         /* Now identify the consecutive vals belonging to the same row */
736         for (j=i,rstart=row[j]; j<n; j++) {
737           if (row[j] != rstart) break;
738         }
739         if (j < n) ncols = j-i;
740         else       ncols = n-i;
741         /* Now assemble all these values with a single function call */
742         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
743 
744         i = j;
745       }
746     }
747     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
748   }
749   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
750   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
751 
752   /* determine if any processor has disassembled, if so we must
753      also disassemble ourselfs, in order that we may reassemble. */
754   /*
755      if nonzero structure of submatrix B cannot change then we know that
756      no processor disassembled thus we can skip this stuff
757   */
758   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
759     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
760     if (mat->was_assembled && !other_disassembled) {
761       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
762     }
763   }
764   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
765     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
766   }
767   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
768   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
769   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
770 
771   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
772 
773   aij->rowvalues = 0;
774 
775   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
776   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
777 
778   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
779   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
780     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
781     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 #undef __FUNCT__
787 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
788 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
789 {
790   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
791   PetscErrorCode ierr;
792 
793   PetscFunctionBegin;
794   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
795   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
796   PetscFunctionReturn(0);
797 }
798 
799 #undef __FUNCT__
800 #define __FUNCT__ "MatZeroRows_MPIAIJ"
801 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
802 {
803   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
804   PetscInt      *lrows;
805   PetscInt       r, len;
806   PetscErrorCode ierr;
807 
808   PetscFunctionBegin;
809   /* get locally owned rows */
810   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
811   /* fix right hand side if needed */
812   if (x && b) {
813     const PetscScalar *xx;
814     PetscScalar       *bb;
815 
816     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
817     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
818     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
819     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
820     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
821   }
822   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
823   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
824   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
825     PetscBool cong;
826     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
827     if (cong) A->congruentlayouts = 1;
828     else      A->congruentlayouts = 0;
829   }
830   if ((diag != 0.0) && A->congruentlayouts) {
831     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
832   } else if (diag != 0.0) {
833     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
834     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
835     for (r = 0; r < len; ++r) {
836       const PetscInt row = lrows[r] + A->rmap->rstart;
837       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
838     }
839     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
840     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
841   } else {
842     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   }
844   ierr = PetscFree(lrows);CHKERRQ(ierr);
845 
846   /* only change matrix nonzero state if pattern was allowed to be changed */
847   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
848     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
849     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
856 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
857 {
858   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
859   PetscErrorCode    ierr;
860   PetscMPIInt       n = A->rmap->n;
861   PetscInt          i,j,r,m,p = 0,len = 0;
862   PetscInt          *lrows,*owners = A->rmap->range;
863   PetscSFNode       *rrows;
864   PetscSF           sf;
865   const PetscScalar *xx;
866   PetscScalar       *bb,*mask;
867   Vec               xmask,lmask;
868   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
869   const PetscInt    *aj, *ii,*ridx;
870   PetscScalar       *aa;
871 
872   PetscFunctionBegin;
873   /* Create SF where leaves are input rows and roots are owned rows */
874   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
875   for (r = 0; r < n; ++r) lrows[r] = -1;
876   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
877   for (r = 0; r < N; ++r) {
878     const PetscInt idx   = rows[r];
879     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
880     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
881       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
882     }
883     rrows[r].rank  = p;
884     rrows[r].index = rows[r] - owners[p];
885   }
886   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
887   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
888   /* Collect flags for rows to be zeroed */
889   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
890   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
891   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
892   /* Compress and put in row numbers */
893   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
894   /* zero diagonal part of matrix */
895   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
896   /* handle off diagonal part of matrix */
897   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
898   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
899   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
900   for (i=0; i<len; i++) bb[lrows[i]] = 1;
901   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
902   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
905   if (x) {
906     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
909     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
910   }
911   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
912   /* remove zeroed rows of off diagonal matrix */
913   ii = aij->i;
914   for (i=0; i<len; i++) {
915     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
916   }
917   /* loop over all elements of off process part of matrix zeroing removed columns*/
918   if (aij->compressedrow.use) {
919     m    = aij->compressedrow.nrows;
920     ii   = aij->compressedrow.i;
921     ridx = aij->compressedrow.rindex;
922     for (i=0; i<m; i++) {
923       n  = ii[i+1] - ii[i];
924       aj = aij->j + ii[i];
925       aa = aij->a + ii[i];
926 
927       for (j=0; j<n; j++) {
928         if (PetscAbsScalar(mask[*aj])) {
929           if (b) bb[*ridx] -= *aa*xx[*aj];
930           *aa = 0.0;
931         }
932         aa++;
933         aj++;
934       }
935       ridx++;
936     }
937   } else { /* do not use compressed row format */
938     m = l->B->rmap->n;
939     for (i=0; i<m; i++) {
940       n  = ii[i+1] - ii[i];
941       aj = aij->j + ii[i];
942       aa = aij->a + ii[i];
943       for (j=0; j<n; j++) {
944         if (PetscAbsScalar(mask[*aj])) {
945           if (b) bb[i] -= *aa*xx[*aj];
946           *aa = 0.0;
947         }
948         aa++;
949         aj++;
950       }
951     }
952   }
953   if (x) {
954     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
955     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
956   }
957   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
958   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
959   ierr = PetscFree(lrows);CHKERRQ(ierr);
960 
961   /* only change matrix nonzero state if pattern was allowed to be changed */
962   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
963     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
964     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
965   }
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMult_MPIAIJ"
971 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975   PetscInt       nt;
976 
977   PetscFunctionBegin;
978   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
979   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
980   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
982   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
989 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
990 {
991   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992   PetscErrorCode ierr;
993 
994   PetscFunctionBegin;
995   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
996   PetscFunctionReturn(0);
997 }
998 
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1001 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1002 {
1003   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1009   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1016 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1017 {
1018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1019   PetscErrorCode ierr;
1020   PetscBool      merged;
1021 
1022   PetscFunctionBegin;
1023   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1024   /* do nondiagonal part */
1025   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1026   if (!merged) {
1027     /* send it on its way */
1028     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1029     /* do local part */
1030     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1031     /* receive remote parts: note this assumes the values are not actually */
1032     /* added in yy until the next line, */
1033     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1034   } else {
1035     /* do local part */
1036     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1037     /* send it on its way */
1038     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1039     /* values actually were received in the Begin() but we need to call this nop */
1040     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1041   }
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1047 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1048 {
1049   MPI_Comm       comm;
1050   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1051   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1052   IS             Me,Notme;
1053   PetscErrorCode ierr;
1054   PetscInt       M,N,first,last,*notme,i;
1055   PetscMPIInt    size;
1056 
1057   PetscFunctionBegin;
1058   /* Easy test: symmetric diagonal block */
1059   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1060   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1061   if (!*f) PetscFunctionReturn(0);
1062   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1063   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1064   if (size == 1) PetscFunctionReturn(0);
1065 
1066   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1067   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1068   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1069   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1070   for (i=0; i<first; i++) notme[i] = i;
1071   for (i=last; i<M; i++) notme[i-last+first] = i;
1072   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1073   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1074   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1075   Aoff = Aoffs[0];
1076   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1077   Boff = Boffs[0];
1078   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1079   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1080   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1081   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1082   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1083   ierr = PetscFree(notme);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1089 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1090 {
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1097   /* send it on its way */
1098   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1099   /* do local part */
1100   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1101   /* receive remote parts */
1102   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /*
1107   This only works correctly for square matrices where the subblock A->A is the
1108    diagonal block
1109 */
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1112 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1113 {
1114   PetscErrorCode ierr;
1115   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1116 
1117   PetscFunctionBegin;
1118   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1119   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1120   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatScale_MPIAIJ"
1126 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1127 {
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1133   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatDestroy_MPIAIJ"
1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145 #if defined(PETSC_USE_LOG)
1146   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1147 #endif
1148   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1149   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1150   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1151   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1152 #if defined(PETSC_USE_CTABLE)
1153   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1154 #else
1155   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1156 #endif
1157   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1158   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1159   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1160   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1161   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1162   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1163 
1164   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1172 #if defined(PETSC_HAVE_ELEMENTAL)
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1174 #endif
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1180 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1181 {
1182   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1183   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1184   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1185   PetscErrorCode ierr;
1186   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1187   int            fd;
1188   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1189   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1190   PetscScalar    *column_values;
1191   PetscInt       message_count,flowcontrolcount;
1192   FILE           *file;
1193 
1194   PetscFunctionBegin;
1195   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1197   nz   = A->nz + B->nz;
1198   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1199   if (!rank) {
1200     header[0] = MAT_FILE_CLASSID;
1201     header[1] = mat->rmap->N;
1202     header[2] = mat->cmap->N;
1203 
1204     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1205     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1206     /* get largest number of rows any processor has */
1207     rlen  = mat->rmap->n;
1208     range = mat->rmap->range;
1209     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1210   } else {
1211     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     rlen = mat->rmap->n;
1213   }
1214 
1215   /* load up the local row counts */
1216   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1217   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1218 
1219   /* store the row lengths to the file */
1220   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1221   if (!rank) {
1222     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1223     for (i=1; i<size; i++) {
1224       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1225       rlen = range[i+1] - range[i];
1226       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     }
1229     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1230   } else {
1231     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1232     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1234   }
1235   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1236 
1237   /* load up the local column indices */
1238   nzmax = nz; /* th processor needs space a largest processor needs */
1239   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1241   cnt   = 0;
1242   for (i=0; i<mat->rmap->n; i++) {
1243     for (j=B->i[i]; j<B->i[i+1]; j++) {
1244       if ((col = garray[B->j[j]]) > cstart) break;
1245       column_indices[cnt++] = col;
1246     }
1247     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1248     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1249   }
1250   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1251 
1252   /* store the column indices to the file */
1253   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1254   if (!rank) {
1255     MPI_Status status;
1256     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1257     for (i=1; i<size; i++) {
1258       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1259       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1260       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1261       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1263     }
1264     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1265   } else {
1266     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1267     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1270   }
1271   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1272 
1273   /* load up the local column values */
1274   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1275   cnt  = 0;
1276   for (i=0; i<mat->rmap->n; i++) {
1277     for (j=B->i[i]; j<B->i[i+1]; j++) {
1278       if (garray[B->j[j]] > cstart) break;
1279       column_values[cnt++] = B->a[j];
1280     }
1281     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1282     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1283   }
1284   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1285 
1286   /* store the column values to the file */
1287   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1288   if (!rank) {
1289     MPI_Status status;
1290     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1291     for (i=1; i<size; i++) {
1292       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1293       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1294       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1295       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1297     }
1298     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1299   } else {
1300     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1301     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1304   }
1305   ierr = PetscFree(column_values);CHKERRQ(ierr);
1306 
1307   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1308   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #include <petscdraw.h>
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1315 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1316 {
1317   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1318   PetscErrorCode    ierr;
1319   PetscMPIInt       rank = aij->rank,size = aij->size;
1320   PetscBool         isdraw,iascii,isbinary;
1321   PetscViewer       sviewer;
1322   PetscViewerFormat format;
1323 
1324   PetscFunctionBegin;
1325   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1327   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1328   if (iascii) {
1329     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1330     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1331       MatInfo   info;
1332       PetscBool inodes;
1333 
1334       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1335       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1336       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1337       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1338       if (!inodes) {
1339         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1340                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1341       } else {
1342         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1343                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1344       }
1345       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1346       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1347       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1348       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1349       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1352       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1353       PetscFunctionReturn(0);
1354     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1355       PetscInt inodecount,inodelimit,*inodes;
1356       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1357       if (inodes) {
1358         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1359       } else {
1360         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1361       }
1362       PetscFunctionReturn(0);
1363     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1364       PetscFunctionReturn(0);
1365     }
1366   } else if (isbinary) {
1367     if (size == 1) {
1368       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1369       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1370     } else {
1371       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1372     }
1373     PetscFunctionReturn(0);
1374   } else if (isdraw) {
1375     PetscDraw draw;
1376     PetscBool isnull;
1377     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1378     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1379     if (isnull) PetscFunctionReturn(0);
1380   }
1381 
1382   {
1383     /* assemble the entire matrix onto first processor. */
1384     Mat        A;
1385     Mat_SeqAIJ *Aloc;
1386     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1387     MatScalar  *a;
1388 
1389     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1390     if (!rank) {
1391       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1392     } else {
1393       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1394     }
1395     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1396     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1397     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1398     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1399     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1400 
1401     /* copy over the A part */
1402     Aloc = (Mat_SeqAIJ*)aij->A->data;
1403     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1404     row  = mat->rmap->rstart;
1405     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1406     for (i=0; i<m; i++) {
1407       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1408       row++;
1409       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1410     }
1411     aj = Aloc->j;
1412     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1413 
1414     /* copy over the B part */
1415     Aloc = (Mat_SeqAIJ*)aij->B->data;
1416     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1419     ct   = cols;
1420     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1421     for (i=0; i<m; i++) {
1422       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1423       row++;
1424       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1425     }
1426     ierr = PetscFree(ct);CHKERRQ(ierr);
1427     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429     /*
1430        Everyone has to call to draw the matrix since the graphics waits are
1431        synchronized across all processors that share the PetscDraw object
1432     */
1433     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1434     if (!rank) {
1435       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1436       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1437     }
1438     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1439     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1440     ierr = MatDestroy(&A);CHKERRQ(ierr);
1441   }
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatView_MPIAIJ"
1447 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1448 {
1449   PetscErrorCode ierr;
1450   PetscBool      iascii,isdraw,issocket,isbinary;
1451 
1452   PetscFunctionBegin;
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1457   if (iascii || isdraw || isbinary || issocket) {
1458     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1459   }
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatSOR_MPIAIJ"
1465 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1466 {
1467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1468   PetscErrorCode ierr;
1469   Vec            bb1 = 0;
1470   PetscBool      hasop;
1471 
1472   PetscFunctionBegin;
1473   if (flag == SOR_APPLY_UPPER) {
1474     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1475     PetscFunctionReturn(0);
1476   }
1477 
1478   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1479     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1480   }
1481 
1482   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1483     if (flag & SOR_ZERO_INITIAL_GUESS) {
1484       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1485       its--;
1486     }
1487 
1488     while (its--) {
1489       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491 
1492       /* update rhs: bb1 = bb - B*x */
1493       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1494       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1495 
1496       /* local sweep */
1497       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1498     }
1499   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1500     if (flag & SOR_ZERO_INITIAL_GUESS) {
1501       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1502       its--;
1503     }
1504     while (its--) {
1505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507 
1508       /* update rhs: bb1 = bb - B*x */
1509       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1510       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1511 
1512       /* local sweep */
1513       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1514     }
1515   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1516     if (flag & SOR_ZERO_INITIAL_GUESS) {
1517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1518       its--;
1519     }
1520     while (its--) {
1521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523 
1524       /* update rhs: bb1 = bb - B*x */
1525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1527 
1528       /* local sweep */
1529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1530     }
1531   } else if (flag & SOR_EISENSTAT) {
1532     Vec xx1;
1533 
1534     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1535     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1536 
1537     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539     if (!mat->diag) {
1540       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1541       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1542     }
1543     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1544     if (hasop) {
1545       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1546     } else {
1547       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1548     }
1549     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1550 
1551     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1552 
1553     /* local sweep */
1554     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1555     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1556     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1557   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1558 
1559   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1560 
1561   matin->factorerrortype = mat->A->factorerrortype;
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatPermute_MPIAIJ"
1567 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1568 {
1569   Mat            aA,aB,Aperm;
1570   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1571   PetscScalar    *aa,*ba;
1572   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1573   PetscSF        rowsf,sf;
1574   IS             parcolp = NULL;
1575   PetscBool      done;
1576   PetscErrorCode ierr;
1577 
1578   PetscFunctionBegin;
1579   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1580   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1581   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1582   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1583 
1584   /* Invert row permutation to find out where my rows should go */
1585   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1586   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1587   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1588   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1589   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1590   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1591 
1592   /* Invert column permutation to find out where my columns should go */
1593   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1594   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1595   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1596   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1597   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1598   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1599   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1600 
1601   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1602   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1603   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1604 
1605   /* Find out where my gcols should go */
1606   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1607   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1608   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1609   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1610   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1611   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1612   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1613   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1614 
1615   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1616   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1617   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1618   for (i=0; i<m; i++) {
1619     PetscInt row = rdest[i],rowner;
1620     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1621     for (j=ai[i]; j<ai[i+1]; j++) {
1622       PetscInt cowner,col = cdest[aj[j]];
1623       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1624       if (rowner == cowner) dnnz[i]++;
1625       else onnz[i]++;
1626     }
1627     for (j=bi[i]; j<bi[i+1]; j++) {
1628       PetscInt cowner,col = gcdest[bj[j]];
1629       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1630       if (rowner == cowner) dnnz[i]++;
1631       else onnz[i]++;
1632     }
1633   }
1634   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1635   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1636   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1637   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1638   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1639 
1640   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1641   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1642   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1643   for (i=0; i<m; i++) {
1644     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1645     PetscInt j0,rowlen;
1646     rowlen = ai[i+1] - ai[i];
1647     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1648       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1649       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1650     }
1651     rowlen = bi[i+1] - bi[i];
1652     for (j0=j=0; j<rowlen; j0=j) {
1653       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1654       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1655     }
1656   }
1657   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1658   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1659   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1660   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1661   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1662   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1663   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1664   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1665   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1666   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1667   *B = Aperm;
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1673 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1674 {
1675   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1680   if (ghosts) *ghosts = aij->garray;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1686 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1687 {
1688   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1689   Mat            A    = mat->A,B = mat->B;
1690   PetscErrorCode ierr;
1691   PetscReal      isend[5],irecv[5];
1692 
1693   PetscFunctionBegin;
1694   info->block_size = 1.0;
1695   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1696 
1697   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1698   isend[3] = info->memory;  isend[4] = info->mallocs;
1699 
1700   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1701 
1702   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1703   isend[3] += info->memory;  isend[4] += info->mallocs;
1704   if (flag == MAT_LOCAL) {
1705     info->nz_used      = isend[0];
1706     info->nz_allocated = isend[1];
1707     info->nz_unneeded  = isend[2];
1708     info->memory       = isend[3];
1709     info->mallocs      = isend[4];
1710   } else if (flag == MAT_GLOBAL_MAX) {
1711     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1712 
1713     info->nz_used      = irecv[0];
1714     info->nz_allocated = irecv[1];
1715     info->nz_unneeded  = irecv[2];
1716     info->memory       = irecv[3];
1717     info->mallocs      = irecv[4];
1718   } else if (flag == MAT_GLOBAL_SUM) {
1719     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1720 
1721     info->nz_used      = irecv[0];
1722     info->nz_allocated = irecv[1];
1723     info->nz_unneeded  = irecv[2];
1724     info->memory       = irecv[3];
1725     info->mallocs      = irecv[4];
1726   }
1727   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1728   info->fill_ratio_needed = 0;
1729   info->factor_mallocs    = 0;
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatSetOption_MPIAIJ"
1735 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1736 {
1737   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   switch (op) {
1742   case MAT_NEW_NONZERO_LOCATIONS:
1743   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1744   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1745   case MAT_KEEP_NONZERO_PATTERN:
1746   case MAT_NEW_NONZERO_LOCATION_ERR:
1747   case MAT_USE_INODES:
1748   case MAT_IGNORE_ZERO_ENTRIES:
1749     MatCheckPreallocated(A,1);
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1752     break;
1753   case MAT_ROW_ORIENTED:
1754     MatCheckPreallocated(A,1);
1755     a->roworiented = flg;
1756 
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_NEW_DIAGONALS:
1761     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1762     break;
1763   case MAT_IGNORE_OFF_PROC_ENTRIES:
1764     a->donotstash = flg;
1765     break;
1766   case MAT_SPD:
1767     A->spd_set = PETSC_TRUE;
1768     A->spd     = flg;
1769     if (flg) {
1770       A->symmetric                  = PETSC_TRUE;
1771       A->structurally_symmetric     = PETSC_TRUE;
1772       A->symmetric_set              = PETSC_TRUE;
1773       A->structurally_symmetric_set = PETSC_TRUE;
1774     }
1775     break;
1776   case MAT_SYMMETRIC:
1777     MatCheckPreallocated(A,1);
1778     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1779     break;
1780   case MAT_STRUCTURALLY_SYMMETRIC:
1781     MatCheckPreallocated(A,1);
1782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1783     break;
1784   case MAT_HERMITIAN:
1785     MatCheckPreallocated(A,1);
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   case MAT_SYMMETRY_ETERNAL:
1789     MatCheckPreallocated(A,1);
1790     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1791     break;
1792   default:
1793     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1794   }
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 #undef __FUNCT__
1799 #define __FUNCT__ "MatGetRow_MPIAIJ"
1800 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1801 {
1802   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1803   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1804   PetscErrorCode ierr;
1805   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1806   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1807   PetscInt       *cmap,*idx_p;
1808 
1809   PetscFunctionBegin;
1810   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1811   mat->getrowactive = PETSC_TRUE;
1812 
1813   if (!mat->rowvalues && (idx || v)) {
1814     /*
1815         allocate enough space to hold information from the longest row.
1816     */
1817     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1818     PetscInt   max = 1,tmp;
1819     for (i=0; i<matin->rmap->n; i++) {
1820       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1821       if (max < tmp) max = tmp;
1822     }
1823     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1824   }
1825 
1826   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1827   lrow = row - rstart;
1828 
1829   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1830   if (!v)   {pvA = 0; pvB = 0;}
1831   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1832   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1833   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1834   nztot = nzA + nzB;
1835 
1836   cmap = mat->garray;
1837   if (v  || idx) {
1838     if (nztot) {
1839       /* Sort by increasing column numbers, assuming A and B already sorted */
1840       PetscInt imark = -1;
1841       if (v) {
1842         *v = v_p = mat->rowvalues;
1843         for (i=0; i<nzB; i++) {
1844           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1845           else break;
1846         }
1847         imark = i;
1848         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1849         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1850       }
1851       if (idx) {
1852         *idx = idx_p = mat->rowindices;
1853         if (imark > -1) {
1854           for (i=0; i<imark; i++) {
1855             idx_p[i] = cmap[cworkB[i]];
1856           }
1857         } else {
1858           for (i=0; i<nzB; i++) {
1859             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1860             else break;
1861           }
1862           imark = i;
1863         }
1864         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1865         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1866       }
1867     } else {
1868       if (idx) *idx = 0;
1869       if (v)   *v   = 0;
1870     }
1871   }
1872   *nz  = nztot;
1873   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1874   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1880 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1881 {
1882   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1883 
1884   PetscFunctionBegin;
1885   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1886   aij->getrowactive = PETSC_FALSE;
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatNorm_MPIAIJ"
1892 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1893 {
1894   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1895   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1896   PetscErrorCode ierr;
1897   PetscInt       i,j,cstart = mat->cmap->rstart;
1898   PetscReal      sum = 0.0;
1899   MatScalar      *v;
1900 
1901   PetscFunctionBegin;
1902   if (aij->size == 1) {
1903     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1904   } else {
1905     if (type == NORM_FROBENIUS) {
1906       v = amat->a;
1907       for (i=0; i<amat->nz; i++) {
1908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1909       }
1910       v = bmat->a;
1911       for (i=0; i<bmat->nz; i++) {
1912         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1913       }
1914       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1915       *norm = PetscSqrtReal(*norm);
1916       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1917     } else if (type == NORM_1) { /* max column norm */
1918       PetscReal *tmp,*tmp2;
1919       PetscInt  *jj,*garray = aij->garray;
1920       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1921       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1922       *norm = 0.0;
1923       v     = amat->a; jj = amat->j;
1924       for (j=0; j<amat->nz; j++) {
1925         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1926       }
1927       v = bmat->a; jj = bmat->j;
1928       for (j=0; j<bmat->nz; j++) {
1929         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1930       }
1931       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1932       for (j=0; j<mat->cmap->N; j++) {
1933         if (tmp2[j] > *norm) *norm = tmp2[j];
1934       }
1935       ierr = PetscFree(tmp);CHKERRQ(ierr);
1936       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1937       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1938     } else if (type == NORM_INFINITY) { /* max row norm */
1939       PetscReal ntemp = 0.0;
1940       for (j=0; j<aij->A->rmap->n; j++) {
1941         v   = amat->a + amat->i[j];
1942         sum = 0.0;
1943         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1944           sum += PetscAbsScalar(*v); v++;
1945         }
1946         v = bmat->a + bmat->i[j];
1947         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1948           sum += PetscAbsScalar(*v); v++;
1949         }
1950         if (sum > ntemp) ntemp = sum;
1951       }
1952       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1953       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1954     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1955   }
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatTranspose_MPIAIJ"
1961 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1962 {
1963   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1964   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1965   PetscErrorCode ierr;
1966   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1967   PetscInt       cstart = A->cmap->rstart,ncol;
1968   Mat            B;
1969   MatScalar      *array;
1970 
1971   PetscFunctionBegin;
1972   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1973 
1974   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1975   ai = Aloc->i; aj = Aloc->j;
1976   bi = Bloc->i; bj = Bloc->j;
1977   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1978     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1979     PetscSFNode          *oloc;
1980     PETSC_UNUSED PetscSF sf;
1981 
1982     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1983     /* compute d_nnz for preallocation */
1984     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1985     for (i=0; i<ai[ma]; i++) {
1986       d_nnz[aj[i]]++;
1987       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1988     }
1989     /* compute local off-diagonal contributions */
1990     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1991     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1992     /* map those to global */
1993     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1994     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1995     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1996     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1997     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1998     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1999     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2000 
2001     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2002     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2003     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2004     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2005     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2006     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2007   } else {
2008     B    = *matout;
2009     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2010     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2011   }
2012 
2013   /* copy over the A part */
2014   array = Aloc->a;
2015   row   = A->rmap->rstart;
2016   for (i=0; i<ma; i++) {
2017     ncol = ai[i+1]-ai[i];
2018     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2019     row++;
2020     array += ncol; aj += ncol;
2021   }
2022   aj = Aloc->j;
2023   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2024 
2025   /* copy over the B part */
2026   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2027   array = Bloc->a;
2028   row   = A->rmap->rstart;
2029   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2030   cols_tmp = cols;
2031   for (i=0; i<mb; i++) {
2032     ncol = bi[i+1]-bi[i];
2033     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2034     row++;
2035     array += ncol; cols_tmp += ncol;
2036   }
2037   ierr = PetscFree(cols);CHKERRQ(ierr);
2038 
2039   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2040   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2041   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2042     *matout = B;
2043   } else {
2044     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2045   }
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 #undef __FUNCT__
2050 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2051 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2052 {
2053   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2054   Mat            a    = aij->A,b = aij->B;
2055   PetscErrorCode ierr;
2056   PetscInt       s1,s2,s3;
2057 
2058   PetscFunctionBegin;
2059   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2060   if (rr) {
2061     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2062     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2063     /* Overlap communication with computation. */
2064     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2065   }
2066   if (ll) {
2067     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2068     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2069     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2070   }
2071   /* scale  the diagonal block */
2072   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2073 
2074   if (rr) {
2075     /* Do a scatter end and then right scale the off-diagonal block */
2076     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2077     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2078   }
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2084 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2085 {
2086   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 #undef __FUNCT__
2095 #define __FUNCT__ "MatEqual_MPIAIJ"
2096 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2097 {
2098   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2099   Mat            a,b,c,d;
2100   PetscBool      flg;
2101   PetscErrorCode ierr;
2102 
2103   PetscFunctionBegin;
2104   a = matA->A; b = matA->B;
2105   c = matB->A; d = matB->B;
2106 
2107   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2108   if (flg) {
2109     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2110   }
2111   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatCopy_MPIAIJ"
2117 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2118 {
2119   PetscErrorCode ierr;
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2122 
2123   PetscFunctionBegin;
2124   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2125   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2126     /* because of the column compression in the off-processor part of the matrix a->B,
2127        the number of columns in a->B and b->B may be different, hence we cannot call
2128        the MatCopy() directly on the two parts. If need be, we can provide a more
2129        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2130        then copying the submatrices */
2131     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2132   } else {
2133     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2134     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2135   }
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatSetUp_MPIAIJ"
2141 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2142 {
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /*
2151    Computes the number of nonzeros per row needed for preallocation when X and Y
2152    have different nonzero structure.
2153 */
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2156 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2157 {
2158   PetscInt       i,j,k,nzx,nzy;
2159 
2160   PetscFunctionBegin;
2161   /* Set the number of nonzeros in the new matrix */
2162   for (i=0; i<m; i++) {
2163     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2164     nzx = xi[i+1] - xi[i];
2165     nzy = yi[i+1] - yi[i];
2166     nnz[i] = 0;
2167     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2168       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2169       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2170       nnz[i]++;
2171     }
2172     for (; k<nzy; k++) nnz[i]++;
2173   }
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2180 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2181 {
2182   PetscErrorCode ierr;
2183   PetscInt       m = Y->rmap->N;
2184   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2185   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2186 
2187   PetscFunctionBegin;
2188   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatAXPY_MPIAIJ"
2194 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2195 {
2196   PetscErrorCode ierr;
2197   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2198   PetscBLASInt   bnz,one=1;
2199   Mat_SeqAIJ     *x,*y;
2200 
2201   PetscFunctionBegin;
2202   if (str == SAME_NONZERO_PATTERN) {
2203     PetscScalar alpha = a;
2204     x    = (Mat_SeqAIJ*)xx->A->data;
2205     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2206     y    = (Mat_SeqAIJ*)yy->A->data;
2207     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2208     x    = (Mat_SeqAIJ*)xx->B->data;
2209     y    = (Mat_SeqAIJ*)yy->B->data;
2210     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2211     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2212     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2213   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2214     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2215   } else {
2216     Mat      B;
2217     PetscInt *nnz_d,*nnz_o;
2218     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2219     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2220     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2221     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2222     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2223     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2224     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2225     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2226     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2227     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2228     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2229     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2230     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2231     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2232   }
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatConjugate_MPIAIJ"
2240 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2241 {
2242 #if defined(PETSC_USE_COMPLEX)
2243   PetscErrorCode ierr;
2244   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2245 
2246   PetscFunctionBegin;
2247   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2248   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2249 #else
2250   PetscFunctionBegin;
2251 #endif
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatRealPart_MPIAIJ"
2257 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2258 {
2259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2260   PetscErrorCode ierr;
2261 
2262   PetscFunctionBegin;
2263   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2264   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2270 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2271 {
2272   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2273   PetscErrorCode ierr;
2274 
2275   PetscFunctionBegin;
2276   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2277   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2283 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2284 {
2285   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2286   PetscErrorCode ierr;
2287   PetscInt       i,*idxb = 0;
2288   PetscScalar    *va,*vb;
2289   Vec            vtmp;
2290 
2291   PetscFunctionBegin;
2292   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2293   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2294   if (idx) {
2295     for (i=0; i<A->rmap->n; i++) {
2296       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2297     }
2298   }
2299 
2300   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2301   if (idx) {
2302     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2303   }
2304   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2305   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2306 
2307   for (i=0; i<A->rmap->n; i++) {
2308     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2309       va[i] = vb[i];
2310       if (idx) idx[i] = a->garray[idxb[i]];
2311     }
2312   }
2313 
2314   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2315   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2316   ierr = PetscFree(idxb);CHKERRQ(ierr);
2317   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2323 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2324 {
2325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2326   PetscErrorCode ierr;
2327   PetscInt       i,*idxb = 0;
2328   PetscScalar    *va,*vb;
2329   Vec            vtmp;
2330 
2331   PetscFunctionBegin;
2332   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2333   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2334   if (idx) {
2335     for (i=0; i<A->cmap->n; i++) {
2336       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2337     }
2338   }
2339 
2340   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2341   if (idx) {
2342     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2343   }
2344   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2345   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2346 
2347   for (i=0; i<A->rmap->n; i++) {
2348     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2349       va[i] = vb[i];
2350       if (idx) idx[i] = a->garray[idxb[i]];
2351     }
2352   }
2353 
2354   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2355   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2356   ierr = PetscFree(idxb);CHKERRQ(ierr);
2357   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2363 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2364 {
2365   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2366   PetscInt       n      = A->rmap->n;
2367   PetscInt       cstart = A->cmap->rstart;
2368   PetscInt       *cmap  = mat->garray;
2369   PetscInt       *diagIdx, *offdiagIdx;
2370   Vec            diagV, offdiagV;
2371   PetscScalar    *a, *diagA, *offdiagA;
2372   PetscInt       r;
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2377   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2378   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2379   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2380   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2381   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2382   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2383   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2384   for (r = 0; r < n; ++r) {
2385     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2386       a[r]   = diagA[r];
2387       idx[r] = cstart + diagIdx[r];
2388     } else {
2389       a[r]   = offdiagA[r];
2390       idx[r] = cmap[offdiagIdx[r]];
2391     }
2392   }
2393   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2394   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2395   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2396   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2397   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2398   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2404 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2405 {
2406   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2407   PetscInt       n      = A->rmap->n;
2408   PetscInt       cstart = A->cmap->rstart;
2409   PetscInt       *cmap  = mat->garray;
2410   PetscInt       *diagIdx, *offdiagIdx;
2411   Vec            diagV, offdiagV;
2412   PetscScalar    *a, *diagA, *offdiagA;
2413   PetscInt       r;
2414   PetscErrorCode ierr;
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2418   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2419   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2420   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2421   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2422   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2423   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2424   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2425   for (r = 0; r < n; ++r) {
2426     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2427       a[r]   = diagA[r];
2428       idx[r] = cstart + diagIdx[r];
2429     } else {
2430       a[r]   = offdiagA[r];
2431       idx[r] = cmap[offdiagIdx[r]];
2432     }
2433   }
2434   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2436   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2437   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2438   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2439   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2445 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2446 {
2447   PetscErrorCode ierr;
2448   Mat            *dummy;
2449 
2450   PetscFunctionBegin;
2451   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2452   *newmat = *dummy;
2453   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2459 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2460 {
2461   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2462   PetscErrorCode ierr;
2463 
2464   PetscFunctionBegin;
2465   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2466   A->factorerrortype = a->A->factorerrortype;
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2472 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2473 {
2474   PetscErrorCode ierr;
2475   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2476 
2477   PetscFunctionBegin;
2478   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2479   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2480   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2481   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2487 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2488 {
2489   PetscFunctionBegin;
2490   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2491   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 #undef __FUNCT__
2496 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2497 /*@
2498    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2499 
2500    Collective on Mat
2501 
2502    Input Parameters:
2503 +    A - the matrix
2504 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2505 
2506  Level: advanced
2507 
2508 @*/
2509 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2510 {
2511   PetscErrorCode       ierr;
2512 
2513   PetscFunctionBegin;
2514   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2520 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2521 {
2522   PetscErrorCode       ierr;
2523   PetscBool            sc = PETSC_FALSE,flg;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2527   ierr = PetscObjectOptionsBegin((PetscObject)A);
2528     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2529     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2530     if (flg) {
2531       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2532     }
2533   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatShift_MPIAIJ"
2539 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2540 {
2541   PetscErrorCode ierr;
2542   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2543   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2544 
2545   PetscFunctionBegin;
2546   if (!Y->preallocated) {
2547     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2548   } else if (!aij->nz) {
2549     PetscInt nonew = aij->nonew;
2550     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2551     aij->nonew = nonew;
2552   }
2553   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2559 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2560 {
2561   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2562   PetscErrorCode ierr;
2563 
2564   PetscFunctionBegin;
2565   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2566   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2567   if (d) {
2568     PetscInt rstart;
2569     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2570     *d += rstart;
2571 
2572   }
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 
2577 /* -------------------------------------------------------------------*/
2578 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2579                                        MatGetRow_MPIAIJ,
2580                                        MatRestoreRow_MPIAIJ,
2581                                        MatMult_MPIAIJ,
2582                                 /* 4*/ MatMultAdd_MPIAIJ,
2583                                        MatMultTranspose_MPIAIJ,
2584                                        MatMultTransposeAdd_MPIAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                 /*10*/ 0,
2589                                        0,
2590                                        0,
2591                                        MatSOR_MPIAIJ,
2592                                        MatTranspose_MPIAIJ,
2593                                 /*15*/ MatGetInfo_MPIAIJ,
2594                                        MatEqual_MPIAIJ,
2595                                        MatGetDiagonal_MPIAIJ,
2596                                        MatDiagonalScale_MPIAIJ,
2597                                        MatNorm_MPIAIJ,
2598                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2599                                        MatAssemblyEnd_MPIAIJ,
2600                                        MatSetOption_MPIAIJ,
2601                                        MatZeroEntries_MPIAIJ,
2602                                 /*24*/ MatZeroRows_MPIAIJ,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                 /*29*/ MatSetUp_MPIAIJ,
2608                                        0,
2609                                        0,
2610                                        MatGetDiagonalBlock_MPIAIJ,
2611                                        0,
2612                                 /*34*/ MatDuplicate_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*39*/ MatAXPY_MPIAIJ,
2618                                        MatGetSubMatrices_MPIAIJ,
2619                                        MatIncreaseOverlap_MPIAIJ,
2620                                        MatGetValues_MPIAIJ,
2621                                        MatCopy_MPIAIJ,
2622                                 /*44*/ MatGetRowMax_MPIAIJ,
2623                                        MatScale_MPIAIJ,
2624                                        MatShift_MPIAIJ,
2625                                        MatDiagonalSet_MPIAIJ,
2626                                        MatZeroRowsColumns_MPIAIJ,
2627                                 /*49*/ MatSetRandom_MPIAIJ,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2633                                        0,
2634                                        MatSetUnfactored_MPIAIJ,
2635                                        MatPermute_MPIAIJ,
2636                                        0,
2637                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2638                                        MatDestroy_MPIAIJ,
2639                                        MatView_MPIAIJ,
2640                                        0,
2641                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2642                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2643                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2648                                        MatGetRowMinAbs_MPIAIJ,
2649                                        0,
2650                                        MatSetLateBlockSizes_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                 /*75*/ MatFDColoringApply_AIJ,
2654                                        MatSetFromOptions_MPIAIJ,
2655                                        0,
2656                                        0,
2657                                        MatFindZeroDiagonals_MPIAIJ,
2658                                 /*80*/ 0,
2659                                        0,
2660                                        0,
2661                                 /*83*/ MatLoad_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2668                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2669                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2670                                        MatPtAP_MPIAIJ_MPIAIJ,
2671                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2672                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                 /*99*/ 0,
2678                                        0,
2679                                        0,
2680                                        MatConjugate_MPIAIJ,
2681                                        0,
2682                                 /*104*/MatSetValuesRow_MPIAIJ,
2683                                        MatRealPart_MPIAIJ,
2684                                        MatImaginaryPart_MPIAIJ,
2685                                        0,
2686                                        0,
2687                                 /*109*/0,
2688                                        0,
2689                                        MatGetRowMin_MPIAIJ,
2690                                        0,
2691                                        MatMissingDiagonal_MPIAIJ,
2692                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2693                                        0,
2694                                        MatGetGhosts_MPIAIJ,
2695                                        0,
2696                                        0,
2697                                 /*119*/0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        MatGetMultiProcBlock_MPIAIJ,
2702                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2703                                        MatGetColumnNorms_MPIAIJ,
2704                                        MatInvertBlockDiagonal_MPIAIJ,
2705                                        0,
2706                                        MatGetSubMatricesMPI_MPIAIJ,
2707                                 /*129*/0,
2708                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2709                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2710                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2711                                        0,
2712                                 /*134*/0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                 /*139*/0,
2718                                        0,
2719                                        0,
2720                                        MatFDColoringSetUp_MPIXAIJ,
2721                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2722                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2723 };
2724 
2725 /* ----------------------------------------------------------------------------------------*/
2726 
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2729 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2730 {
2731   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2732   PetscErrorCode ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2736   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2742 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2743 {
2744   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2749   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2755 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2756 {
2757   Mat_MPIAIJ     *b;
2758   PetscErrorCode ierr;
2759 
2760   PetscFunctionBegin;
2761   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2762   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2763   b = (Mat_MPIAIJ*)B->data;
2764 
2765   if (!B->preallocated) {
2766     /* Explicitly create 2 MATSEQAIJ matrices. */
2767     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2768     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2769     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2770     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2771     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2772     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2773     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2774     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2775     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2776     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2777   }
2778 
2779   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2780   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2781   B->preallocated = PETSC_TRUE;
2782   PetscFunctionReturn(0);
2783 }
2784 
2785 #undef __FUNCT__
2786 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2787 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2788 {
2789   Mat            mat;
2790   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2791   PetscErrorCode ierr;
2792 
2793   PetscFunctionBegin;
2794   *newmat = 0;
2795   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2796   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2797   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2798   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2799   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2800   a       = (Mat_MPIAIJ*)mat->data;
2801 
2802   mat->factortype   = matin->factortype;
2803   mat->assembled    = PETSC_TRUE;
2804   mat->insertmode   = NOT_SET_VALUES;
2805   mat->preallocated = PETSC_TRUE;
2806 
2807   a->size         = oldmat->size;
2808   a->rank         = oldmat->rank;
2809   a->donotstash   = oldmat->donotstash;
2810   a->roworiented  = oldmat->roworiented;
2811   a->rowindices   = 0;
2812   a->rowvalues    = 0;
2813   a->getrowactive = PETSC_FALSE;
2814 
2815   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2816   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2817 
2818   if (oldmat->colmap) {
2819 #if defined(PETSC_USE_CTABLE)
2820     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2821 #else
2822     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2823     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2824     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2825 #endif
2826   } else a->colmap = 0;
2827   if (oldmat->garray) {
2828     PetscInt len;
2829     len  = oldmat->B->cmap->n;
2830     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2831     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2832     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2833   } else a->garray = 0;
2834 
2835   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2836   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2837   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2838   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2839   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2840   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2841   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2842   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2843   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2844   *newmat = mat;
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 
2849 
2850 #undef __FUNCT__
2851 #define __FUNCT__ "MatLoad_MPIAIJ"
2852 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2853 {
2854   PetscScalar    *vals,*svals;
2855   MPI_Comm       comm;
2856   PetscErrorCode ierr;
2857   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2858   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2859   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2860   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2861   PetscInt       cend,cstart,n,*rowners;
2862   int            fd;
2863   PetscInt       bs = newMat->rmap->bs;
2864 
2865   PetscFunctionBegin;
2866   /* force binary viewer to load .info file if it has not yet done so */
2867   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2868   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2869   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2870   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2871   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2872   if (!rank) {
2873     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2874     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2875     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2876   }
2877 
2878   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2879   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2880   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2881   if (bs < 0) bs = 1;
2882 
2883   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2884   M    = header[1]; N = header[2];
2885 
2886   /* If global sizes are set, check if they are consistent with that given in the file */
2887   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2888   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2889 
2890   /* determine ownership of all (block) rows */
2891   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2892   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2893   else m = newMat->rmap->n; /* Set by user */
2894 
2895   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2896   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2897 
2898   /* First process needs enough room for process with most rows */
2899   if (!rank) {
2900     mmax = rowners[1];
2901     for (i=2; i<=size; i++) {
2902       mmax = PetscMax(mmax, rowners[i]);
2903     }
2904   } else mmax = -1;             /* unused, but compilers complain */
2905 
2906   rowners[0] = 0;
2907   for (i=2; i<=size; i++) {
2908     rowners[i] += rowners[i-1];
2909   }
2910   rstart = rowners[rank];
2911   rend   = rowners[rank+1];
2912 
2913   /* distribute row lengths to all processors */
2914   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2915   if (!rank) {
2916     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2917     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2918     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2919     for (j=0; j<m; j++) {
2920       procsnz[0] += ourlens[j];
2921     }
2922     for (i=1; i<size; i++) {
2923       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2924       /* calculate the number of nonzeros on each processor */
2925       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2926         procsnz[i] += rowlengths[j];
2927       }
2928       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2929     }
2930     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2931   } else {
2932     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2933   }
2934 
2935   if (!rank) {
2936     /* determine max buffer needed and allocate it */
2937     maxnz = 0;
2938     for (i=0; i<size; i++) {
2939       maxnz = PetscMax(maxnz,procsnz[i]);
2940     }
2941     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2942 
2943     /* read in my part of the matrix column indices  */
2944     nz   = procsnz[0];
2945     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2946     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2947 
2948     /* read in every one elses and ship off */
2949     for (i=1; i<size; i++) {
2950       nz   = procsnz[i];
2951       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2952       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2953     }
2954     ierr = PetscFree(cols);CHKERRQ(ierr);
2955   } else {
2956     /* determine buffer space needed for message */
2957     nz = 0;
2958     for (i=0; i<m; i++) {
2959       nz += ourlens[i];
2960     }
2961     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2962 
2963     /* receive message of column indices*/
2964     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2965   }
2966 
2967   /* determine column ownership if matrix is not square */
2968   if (N != M) {
2969     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2970     else n = newMat->cmap->n;
2971     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2972     cstart = cend - n;
2973   } else {
2974     cstart = rstart;
2975     cend   = rend;
2976     n      = cend - cstart;
2977   }
2978 
2979   /* loop over local rows, determining number of off diagonal entries */
2980   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2981   jj   = 0;
2982   for (i=0; i<m; i++) {
2983     for (j=0; j<ourlens[i]; j++) {
2984       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2985       jj++;
2986     }
2987   }
2988 
2989   for (i=0; i<m; i++) {
2990     ourlens[i] -= offlens[i];
2991   }
2992   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2993 
2994   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2995 
2996   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2997 
2998   for (i=0; i<m; i++) {
2999     ourlens[i] += offlens[i];
3000   }
3001 
3002   if (!rank) {
3003     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3004 
3005     /* read in my part of the matrix numerical values  */
3006     nz   = procsnz[0];
3007     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3008 
3009     /* insert into matrix */
3010     jj      = rstart;
3011     smycols = mycols;
3012     svals   = vals;
3013     for (i=0; i<m; i++) {
3014       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3015       smycols += ourlens[i];
3016       svals   += ourlens[i];
3017       jj++;
3018     }
3019 
3020     /* read in other processors and ship out */
3021     for (i=1; i<size; i++) {
3022       nz   = procsnz[i];
3023       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3024       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3025     }
3026     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3027   } else {
3028     /* receive numeric values */
3029     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3030 
3031     /* receive message of values*/
3032     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3033 
3034     /* insert into matrix */
3035     jj      = rstart;
3036     smycols = mycols;
3037     svals   = vals;
3038     for (i=0; i<m; i++) {
3039       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3040       smycols += ourlens[i];
3041       svals   += ourlens[i];
3042       jj++;
3043     }
3044   }
3045   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3046   ierr = PetscFree(vals);CHKERRQ(ierr);
3047   ierr = PetscFree(mycols);CHKERRQ(ierr);
3048   ierr = PetscFree(rowners);CHKERRQ(ierr);
3049   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3050   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3056 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3057 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3058 {
3059   PetscErrorCode ierr;
3060   IS             iscol_local;
3061   PetscInt       csize;
3062 
3063   PetscFunctionBegin;
3064   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3065   if (call == MAT_REUSE_MATRIX) {
3066     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3067     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3068   } else {
3069     /* check if we are grabbing all columns*/
3070     PetscBool    isstride;
3071     PetscMPIInt  lisstride = 0,gisstride;
3072     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3073     if (isstride) {
3074       PetscInt  start,len,mstart,mlen;
3075       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3076       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3077       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3078       if (mstart == start && mlen-mstart == len) lisstride = 1;
3079     }
3080     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3081     if (gisstride) {
3082       PetscInt N;
3083       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3084       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3085       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3086       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3087     } else {
3088       PetscInt cbs;
3089       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3090       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3091       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3092     }
3093   }
3094   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3095   if (call == MAT_INITIAL_MATRIX) {
3096     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3097     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3098   }
3099   PetscFunctionReturn(0);
3100 }
3101 
3102 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3103 #undef __FUNCT__
3104 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3105 /*
3106     Not great since it makes two copies of the submatrix, first an SeqAIJ
3107   in local and then by concatenating the local matrices the end result.
3108   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3109 
3110   Note: This requires a sequential iscol with all indices.
3111 */
3112 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3113 {
3114   PetscErrorCode ierr;
3115   PetscMPIInt    rank,size;
3116   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3117   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3118   PetscBool      allcolumns, colflag;
3119   Mat            M,Mreuse;
3120   MatScalar      *vwork,*aa;
3121   MPI_Comm       comm;
3122   Mat_SeqAIJ     *aij;
3123 
3124   PetscFunctionBegin;
3125   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3126   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3127   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3128 
3129   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3130   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3131   if (colflag && ncol == mat->cmap->N) {
3132     allcolumns = PETSC_TRUE;
3133     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3134   } else {
3135     allcolumns = PETSC_FALSE;
3136   }
3137   if (call ==  MAT_REUSE_MATRIX) {
3138     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3139     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3140     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3141   } else {
3142     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3143   }
3144 
3145   /*
3146       m - number of local rows
3147       n - number of columns (same on all processors)
3148       rstart - first row in new global matrix generated
3149   */
3150   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3151   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3152   if (call == MAT_INITIAL_MATRIX) {
3153     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3154     ii  = aij->i;
3155     jj  = aij->j;
3156 
3157     /*
3158         Determine the number of non-zeros in the diagonal and off-diagonal
3159         portions of the matrix in order to do correct preallocation
3160     */
3161 
3162     /* first get start and end of "diagonal" columns */
3163     if (csize == PETSC_DECIDE) {
3164       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3165       if (mglobal == n) { /* square matrix */
3166         nlocal = m;
3167       } else {
3168         nlocal = n/size + ((n % size) > rank);
3169       }
3170     } else {
3171       nlocal = csize;
3172     }
3173     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3174     rstart = rend - nlocal;
3175     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3176 
3177     /* next, compute all the lengths */
3178     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3179     olens = dlens + m;
3180     for (i=0; i<m; i++) {
3181       jend = ii[i+1] - ii[i];
3182       olen = 0;
3183       dlen = 0;
3184       for (j=0; j<jend; j++) {
3185         if (*jj < rstart || *jj >= rend) olen++;
3186         else dlen++;
3187         jj++;
3188       }
3189       olens[i] = olen;
3190       dlens[i] = dlen;
3191     }
3192     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3193     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3194     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3195     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3196     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3197     ierr = PetscFree(dlens);CHKERRQ(ierr);
3198   } else {
3199     PetscInt ml,nl;
3200 
3201     M    = *newmat;
3202     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3203     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3204     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3205     /*
3206          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3207        rather than the slower MatSetValues().
3208     */
3209     M->was_assembled = PETSC_TRUE;
3210     M->assembled     = PETSC_FALSE;
3211   }
3212   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3213   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3214   ii   = aij->i;
3215   jj   = aij->j;
3216   aa   = aij->a;
3217   for (i=0; i<m; i++) {
3218     row   = rstart + i;
3219     nz    = ii[i+1] - ii[i];
3220     cwork = jj;     jj += nz;
3221     vwork = aa;     aa += nz;
3222     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3223   }
3224 
3225   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3226   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3227   *newmat = M;
3228 
3229   /* save submatrix used in processor for next request */
3230   if (call ==  MAT_INITIAL_MATRIX) {
3231     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3232     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3233   }
3234   PetscFunctionReturn(0);
3235 }
3236 
3237 #undef __FUNCT__
3238 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3239 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3240 {
3241   PetscInt       m,cstart, cend,j,nnz,i,d;
3242   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3243   const PetscInt *JJ;
3244   PetscScalar    *values;
3245   PetscErrorCode ierr;
3246 
3247   PetscFunctionBegin;
3248   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3249 
3250   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3251   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3252   m      = B->rmap->n;
3253   cstart = B->cmap->rstart;
3254   cend   = B->cmap->rend;
3255   rstart = B->rmap->rstart;
3256 
3257   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3258 
3259 #if defined(PETSC_USE_DEBUGGING)
3260   for (i=0; i<m; i++) {
3261     nnz = Ii[i+1]- Ii[i];
3262     JJ  = J + Ii[i];
3263     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3264     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3265     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3266   }
3267 #endif
3268 
3269   for (i=0; i<m; i++) {
3270     nnz     = Ii[i+1]- Ii[i];
3271     JJ      = J + Ii[i];
3272     nnz_max = PetscMax(nnz_max,nnz);
3273     d       = 0;
3274     for (j=0; j<nnz; j++) {
3275       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3276     }
3277     d_nnz[i] = d;
3278     o_nnz[i] = nnz - d;
3279   }
3280   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3281   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3282 
3283   if (v) values = (PetscScalar*)v;
3284   else {
3285     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3286   }
3287 
3288   for (i=0; i<m; i++) {
3289     ii   = i + rstart;
3290     nnz  = Ii[i+1]- Ii[i];
3291     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3292   }
3293   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3294   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3295 
3296   if (!v) {
3297     ierr = PetscFree(values);CHKERRQ(ierr);
3298   }
3299   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3300   PetscFunctionReturn(0);
3301 }
3302 
3303 #undef __FUNCT__
3304 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3305 /*@
3306    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3307    (the default parallel PETSc format).
3308 
3309    Collective on MPI_Comm
3310 
3311    Input Parameters:
3312 +  B - the matrix
3313 .  i - the indices into j for the start of each local row (starts with zero)
3314 .  j - the column indices for each local row (starts with zero)
3315 -  v - optional values in the matrix
3316 
3317    Level: developer
3318 
3319    Notes:
3320        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3321      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3322      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3323 
3324        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3325 
3326        The format which is used for the sparse matrix input, is equivalent to a
3327     row-major ordering.. i.e for the following matrix, the input data expected is
3328     as shown
3329 
3330 $        1 0 0
3331 $        2 0 3     P0
3332 $       -------
3333 $        4 5 6     P1
3334 $
3335 $     Process0 [P0]: rows_owned=[0,1]
3336 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3337 $        j =  {0,0,2}  [size = 3]
3338 $        v =  {1,2,3}  [size = 3]
3339 $
3340 $     Process1 [P1]: rows_owned=[2]
3341 $        i =  {0,3}    [size = nrow+1  = 1+1]
3342 $        j =  {0,1,2}  [size = 3]
3343 $        v =  {4,5,6}  [size = 3]
3344 
3345 .keywords: matrix, aij, compressed row, sparse, parallel
3346 
3347 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3348           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3349 @*/
3350 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3351 {
3352   PetscErrorCode ierr;
3353 
3354   PetscFunctionBegin;
3355   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3356   PetscFunctionReturn(0);
3357 }
3358 
3359 #undef __FUNCT__
3360 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3361 /*@C
3362    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3363    (the default parallel PETSc format).  For good matrix assembly performance
3364    the user should preallocate the matrix storage by setting the parameters
3365    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3366    performance can be increased by more than a factor of 50.
3367 
3368    Collective on MPI_Comm
3369 
3370    Input Parameters:
3371 +  B - the matrix
3372 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3373            (same value is used for all local rows)
3374 .  d_nnz - array containing the number of nonzeros in the various rows of the
3375            DIAGONAL portion of the local submatrix (possibly different for each row)
3376            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3377            The size of this array is equal to the number of local rows, i.e 'm'.
3378            For matrices that will be factored, you must leave room for (and set)
3379            the diagonal entry even if it is zero.
3380 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3381            submatrix (same value is used for all local rows).
3382 -  o_nnz - array containing the number of nonzeros in the various rows of the
3383            OFF-DIAGONAL portion of the local submatrix (possibly different for
3384            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3385            structure. The size of this array is equal to the number
3386            of local rows, i.e 'm'.
3387 
3388    If the *_nnz parameter is given then the *_nz parameter is ignored
3389 
3390    The AIJ format (also called the Yale sparse matrix format or
3391    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3392    storage.  The stored row and column indices begin with zero.
3393    See Users-Manual: ch_mat for details.
3394 
3395    The parallel matrix is partitioned such that the first m0 rows belong to
3396    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3397    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3398 
3399    The DIAGONAL portion of the local submatrix of a processor can be defined
3400    as the submatrix which is obtained by extraction the part corresponding to
3401    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3402    first row that belongs to the processor, r2 is the last row belonging to
3403    the this processor, and c1-c2 is range of indices of the local part of a
3404    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3405    common case of a square matrix, the row and column ranges are the same and
3406    the DIAGONAL part is also square. The remaining portion of the local
3407    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3408 
3409    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3410 
3411    You can call MatGetInfo() to get information on how effective the preallocation was;
3412    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3413    You can also run with the option -info and look for messages with the string
3414    malloc in them to see if additional memory allocation was needed.
3415 
3416    Example usage:
3417 
3418    Consider the following 8x8 matrix with 34 non-zero values, that is
3419    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3420    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3421    as follows:
3422 
3423 .vb
3424             1  2  0  |  0  3  0  |  0  4
3425     Proc0   0  5  6  |  7  0  0  |  8  0
3426             9  0 10  | 11  0  0  | 12  0
3427     -------------------------------------
3428            13  0 14  | 15 16 17  |  0  0
3429     Proc1   0 18  0  | 19 20 21  |  0  0
3430             0  0  0  | 22 23  0  | 24  0
3431     -------------------------------------
3432     Proc2  25 26 27  |  0  0 28  | 29  0
3433            30  0  0  | 31 32 33  |  0 34
3434 .ve
3435 
3436    This can be represented as a collection of submatrices as:
3437 
3438 .vb
3439       A B C
3440       D E F
3441       G H I
3442 .ve
3443 
3444    Where the submatrices A,B,C are owned by proc0, D,E,F are
3445    owned by proc1, G,H,I are owned by proc2.
3446 
3447    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3448    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3449    The 'M','N' parameters are 8,8, and have the same values on all procs.
3450 
3451    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3452    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3453    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3454    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3455    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3456    matrix, ans [DF] as another SeqAIJ matrix.
3457 
3458    When d_nz, o_nz parameters are specified, d_nz storage elements are
3459    allocated for every row of the local diagonal submatrix, and o_nz
3460    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3461    One way to choose d_nz and o_nz is to use the max nonzerors per local
3462    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3463    In this case, the values of d_nz,o_nz are:
3464 .vb
3465      proc0 : dnz = 2, o_nz = 2
3466      proc1 : dnz = 3, o_nz = 2
3467      proc2 : dnz = 1, o_nz = 4
3468 .ve
3469    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3470    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3471    for proc3. i.e we are using 12+15+10=37 storage locations to store
3472    34 values.
3473 
3474    When d_nnz, o_nnz parameters are specified, the storage is specified
3475    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3476    In the above case the values for d_nnz,o_nnz are:
3477 .vb
3478      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3479      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3480      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3481 .ve
3482    Here the space allocated is sum of all the above values i.e 34, and
3483    hence pre-allocation is perfect.
3484 
3485    Level: intermediate
3486 
3487 .keywords: matrix, aij, compressed row, sparse, parallel
3488 
3489 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3490           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3491 @*/
3492 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3493 {
3494   PetscErrorCode ierr;
3495 
3496   PetscFunctionBegin;
3497   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3498   PetscValidType(B,1);
3499   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3500   PetscFunctionReturn(0);
3501 }
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3505 /*@
3506      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3507          CSR format the local rows.
3508 
3509    Collective on MPI_Comm
3510 
3511    Input Parameters:
3512 +  comm - MPI communicator
3513 .  m - number of local rows (Cannot be PETSC_DECIDE)
3514 .  n - This value should be the same as the local size used in creating the
3515        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3516        calculated if N is given) For square matrices n is almost always m.
3517 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3518 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3519 .   i - row indices
3520 .   j - column indices
3521 -   a - matrix values
3522 
3523    Output Parameter:
3524 .   mat - the matrix
3525 
3526    Level: intermediate
3527 
3528    Notes:
3529        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3530      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3531      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3532 
3533        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3534 
3535        The format which is used for the sparse matrix input, is equivalent to a
3536     row-major ordering.. i.e for the following matrix, the input data expected is
3537     as shown
3538 
3539 $        1 0 0
3540 $        2 0 3     P0
3541 $       -------
3542 $        4 5 6     P1
3543 $
3544 $     Process0 [P0]: rows_owned=[0,1]
3545 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3546 $        j =  {0,0,2}  [size = 3]
3547 $        v =  {1,2,3}  [size = 3]
3548 $
3549 $     Process1 [P1]: rows_owned=[2]
3550 $        i =  {0,3}    [size = nrow+1  = 1+1]
3551 $        j =  {0,1,2}  [size = 3]
3552 $        v =  {4,5,6}  [size = 3]
3553 
3554 .keywords: matrix, aij, compressed row, sparse, parallel
3555 
3556 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3557           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3558 @*/
3559 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3560 {
3561   PetscErrorCode ierr;
3562 
3563   PetscFunctionBegin;
3564   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3565   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3566   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3567   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3568   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3569   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3570   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3571   PetscFunctionReturn(0);
3572 }
3573 
3574 #undef __FUNCT__
3575 #define __FUNCT__ "MatCreateAIJ"
3576 /*@C
3577    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3578    (the default parallel PETSc format).  For good matrix assembly performance
3579    the user should preallocate the matrix storage by setting the parameters
3580    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3581    performance can be increased by more than a factor of 50.
3582 
3583    Collective on MPI_Comm
3584 
3585    Input Parameters:
3586 +  comm - MPI communicator
3587 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3588            This value should be the same as the local size used in creating the
3589            y vector for the matrix-vector product y = Ax.
3590 .  n - This value should be the same as the local size used in creating the
3591        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3592        calculated if N is given) For square matrices n is almost always m.
3593 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3594 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3595 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3596            (same value is used for all local rows)
3597 .  d_nnz - array containing the number of nonzeros in the various rows of the
3598            DIAGONAL portion of the local submatrix (possibly different for each row)
3599            or NULL, if d_nz is used to specify the nonzero structure.
3600            The size of this array is equal to the number of local rows, i.e 'm'.
3601 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3602            submatrix (same value is used for all local rows).
3603 -  o_nnz - array containing the number of nonzeros in the various rows of the
3604            OFF-DIAGONAL portion of the local submatrix (possibly different for
3605            each row) or NULL, if o_nz is used to specify the nonzero
3606            structure. The size of this array is equal to the number
3607            of local rows, i.e 'm'.
3608 
3609    Output Parameter:
3610 .  A - the matrix
3611 
3612    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3613    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3614    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3615 
3616    Notes:
3617    If the *_nnz parameter is given then the *_nz parameter is ignored
3618 
3619    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3620    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3621    storage requirements for this matrix.
3622 
3623    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3624    processor than it must be used on all processors that share the object for
3625    that argument.
3626 
3627    The user MUST specify either the local or global matrix dimensions
3628    (possibly both).
3629 
3630    The parallel matrix is partitioned across processors such that the
3631    first m0 rows belong to process 0, the next m1 rows belong to
3632    process 1, the next m2 rows belong to process 2 etc.. where
3633    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3634    values corresponding to [m x N] submatrix.
3635 
3636    The columns are logically partitioned with the n0 columns belonging
3637    to 0th partition, the next n1 columns belonging to the next
3638    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3639 
3640    The DIAGONAL portion of the local submatrix on any given processor
3641    is the submatrix corresponding to the rows and columns m,n
3642    corresponding to the given processor. i.e diagonal matrix on
3643    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3644    etc. The remaining portion of the local submatrix [m x (N-n)]
3645    constitute the OFF-DIAGONAL portion. The example below better
3646    illustrates this concept.
3647 
3648    For a square global matrix we define each processor's diagonal portion
3649    to be its local rows and the corresponding columns (a square submatrix);
3650    each processor's off-diagonal portion encompasses the remainder of the
3651    local matrix (a rectangular submatrix).
3652 
3653    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3654 
3655    When calling this routine with a single process communicator, a matrix of
3656    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3657    type of communicator, use the construction mechanism:
3658      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3659 
3660    By default, this format uses inodes (identical nodes) when possible.
3661    We search for consecutive rows with the same nonzero structure, thereby
3662    reusing matrix information to achieve increased efficiency.
3663 
3664    Options Database Keys:
3665 +  -mat_no_inode  - Do not use inodes
3666 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3667 -  -mat_aij_oneindex - Internally use indexing starting at 1
3668         rather than 0.  Note that when calling MatSetValues(),
3669         the user still MUST index entries starting at 0!
3670 
3671 
3672    Example usage:
3673 
3674    Consider the following 8x8 matrix with 34 non-zero values, that is
3675    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3676    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3677    as follows:
3678 
3679 .vb
3680             1  2  0  |  0  3  0  |  0  4
3681     Proc0   0  5  6  |  7  0  0  |  8  0
3682             9  0 10  | 11  0  0  | 12  0
3683     -------------------------------------
3684            13  0 14  | 15 16 17  |  0  0
3685     Proc1   0 18  0  | 19 20 21  |  0  0
3686             0  0  0  | 22 23  0  | 24  0
3687     -------------------------------------
3688     Proc2  25 26 27  |  0  0 28  | 29  0
3689            30  0  0  | 31 32 33  |  0 34
3690 .ve
3691 
3692    This can be represented as a collection of submatrices as:
3693 
3694 .vb
3695       A B C
3696       D E F
3697       G H I
3698 .ve
3699 
3700    Where the submatrices A,B,C are owned by proc0, D,E,F are
3701    owned by proc1, G,H,I are owned by proc2.
3702 
3703    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3704    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3705    The 'M','N' parameters are 8,8, and have the same values on all procs.
3706 
3707    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3708    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3709    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3710    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3711    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3712    matrix, ans [DF] as another SeqAIJ matrix.
3713 
3714    When d_nz, o_nz parameters are specified, d_nz storage elements are
3715    allocated for every row of the local diagonal submatrix, and o_nz
3716    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3717    One way to choose d_nz and o_nz is to use the max nonzerors per local
3718    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3719    In this case, the values of d_nz,o_nz are:
3720 .vb
3721      proc0 : dnz = 2, o_nz = 2
3722      proc1 : dnz = 3, o_nz = 2
3723      proc2 : dnz = 1, o_nz = 4
3724 .ve
3725    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3726    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3727    for proc3. i.e we are using 12+15+10=37 storage locations to store
3728    34 values.
3729 
3730    When d_nnz, o_nnz parameters are specified, the storage is specified
3731    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3732    In the above case the values for d_nnz,o_nnz are:
3733 .vb
3734      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3735      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3736      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3737 .ve
3738    Here the space allocated is sum of all the above values i.e 34, and
3739    hence pre-allocation is perfect.
3740 
3741    Level: intermediate
3742 
3743 .keywords: matrix, aij, compressed row, sparse, parallel
3744 
3745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3746           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3747 @*/
3748 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3749 {
3750   PetscErrorCode ierr;
3751   PetscMPIInt    size;
3752 
3753   PetscFunctionBegin;
3754   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3755   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3756   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3757   if (size > 1) {
3758     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3759     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3760   } else {
3761     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3762     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3763   }
3764   PetscFunctionReturn(0);
3765 }
3766 
3767 #undef __FUNCT__
3768 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3769 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3770 {
3771   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3772   PetscBool      flg;
3773   PetscErrorCode ierr;
3774 
3775   PetscFunctionBegin;
3776   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3777   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3778   if (Ad)     *Ad     = a->A;
3779   if (Ao)     *Ao     = a->B;
3780   if (colmap) *colmap = a->garray;
3781   PetscFunctionReturn(0);
3782 }
3783 
3784 #undef __FUNCT__
3785 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3786 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3787 {
3788   PetscErrorCode ierr;
3789   PetscInt       m,N,i,rstart,nnz,Ii;
3790   PetscInt       *indx;
3791   PetscScalar    *values;
3792 
3793   PetscFunctionBegin;
3794   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3795   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3796     PetscInt       *dnz,*onz,sum,bs,cbs;
3797 
3798     if (n == PETSC_DECIDE) {
3799       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3800     }
3801     /* Check sum(n) = N */
3802     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3803     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3804 
3805     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3806     rstart -= m;
3807 
3808     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3809     for (i=0; i<m; i++) {
3810       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3811       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3812       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3813     }
3814 
3815     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3816     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3817     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3818     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3819     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3820     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3821     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3822   }
3823 
3824   /* numeric phase */
3825   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3826   for (i=0; i<m; i++) {
3827     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3828     Ii   = i + rstart;
3829     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3830     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3831   }
3832   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3833   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 #undef __FUNCT__
3838 #define __FUNCT__ "MatFileSplit"
3839 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3840 {
3841   PetscErrorCode    ierr;
3842   PetscMPIInt       rank;
3843   PetscInt          m,N,i,rstart,nnz;
3844   size_t            len;
3845   const PetscInt    *indx;
3846   PetscViewer       out;
3847   char              *name;
3848   Mat               B;
3849   const PetscScalar *values;
3850 
3851   PetscFunctionBegin;
3852   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3853   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3854   /* Should this be the type of the diagonal block of A? */
3855   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3856   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3857   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3858   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3859   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3860   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3861   for (i=0; i<m; i++) {
3862     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3863     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3864     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3865   }
3866   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3867   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3868 
3869   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3870   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3871   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3872   sprintf(name,"%s.%d",outfile,rank);
3873   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3874   ierr = PetscFree(name);CHKERRQ(ierr);
3875   ierr = MatView(B,out);CHKERRQ(ierr);
3876   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3877   ierr = MatDestroy(&B);CHKERRQ(ierr);
3878   PetscFunctionReturn(0);
3879 }
3880 
3881 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3882 #undef __FUNCT__
3883 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3884 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3885 {
3886   PetscErrorCode      ierr;
3887   Mat_Merge_SeqsToMPI *merge;
3888   PetscContainer      container;
3889 
3890   PetscFunctionBegin;
3891   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3892   if (container) {
3893     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3894     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3895     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3896     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3902     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3903     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3904     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3905     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3906     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3907     ierr = PetscFree(merge);CHKERRQ(ierr);
3908     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3909   }
3910   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 #include <../src/mat/utils/freespace.h>
3915 #include <petscbt.h>
3916 
3917 #undef __FUNCT__
3918 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3919 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3920 {
3921   PetscErrorCode      ierr;
3922   MPI_Comm            comm;
3923   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3924   PetscMPIInt         size,rank,taga,*len_s;
3925   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3926   PetscInt            proc,m;
3927   PetscInt            **buf_ri,**buf_rj;
3928   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3929   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3930   MPI_Request         *s_waits,*r_waits;
3931   MPI_Status          *status;
3932   MatScalar           *aa=a->a;
3933   MatScalar           **abuf_r,*ba_i;
3934   Mat_Merge_SeqsToMPI *merge;
3935   PetscContainer      container;
3936 
3937   PetscFunctionBegin;
3938   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3939   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3940 
3941   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3942   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3943 
3944   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3945   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3946 
3947   bi     = merge->bi;
3948   bj     = merge->bj;
3949   buf_ri = merge->buf_ri;
3950   buf_rj = merge->buf_rj;
3951 
3952   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3953   owners = merge->rowmap->range;
3954   len_s  = merge->len_s;
3955 
3956   /* send and recv matrix values */
3957   /*-----------------------------*/
3958   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3959   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3960 
3961   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3962   for (proc=0,k=0; proc<size; proc++) {
3963     if (!len_s[proc]) continue;
3964     i    = owners[proc];
3965     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3966     k++;
3967   }
3968 
3969   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3970   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3971   ierr = PetscFree(status);CHKERRQ(ierr);
3972 
3973   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3974   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3975 
3976   /* insert mat values of mpimat */
3977   /*----------------------------*/
3978   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3979   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3980 
3981   for (k=0; k<merge->nrecv; k++) {
3982     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3983     nrows       = *(buf_ri_k[k]);
3984     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3985     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3986   }
3987 
3988   /* set values of ba */
3989   m = merge->rowmap->n;
3990   for (i=0; i<m; i++) {
3991     arow = owners[rank] + i;
3992     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3993     bnzi = bi[i+1] - bi[i];
3994     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3995 
3996     /* add local non-zero vals of this proc's seqmat into ba */
3997     anzi   = ai[arow+1] - ai[arow];
3998     aj     = a->j + ai[arow];
3999     aa     = a->a + ai[arow];
4000     nextaj = 0;
4001     for (j=0; nextaj<anzi; j++) {
4002       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4003         ba_i[j] += aa[nextaj++];
4004       }
4005     }
4006 
4007     /* add received vals into ba */
4008     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4009       /* i-th row */
4010       if (i == *nextrow[k]) {
4011         anzi   = *(nextai[k]+1) - *nextai[k];
4012         aj     = buf_rj[k] + *(nextai[k]);
4013         aa     = abuf_r[k] + *(nextai[k]);
4014         nextaj = 0;
4015         for (j=0; nextaj<anzi; j++) {
4016           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4017             ba_i[j] += aa[nextaj++];
4018           }
4019         }
4020         nextrow[k]++; nextai[k]++;
4021       }
4022     }
4023     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4024   }
4025   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4026   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4027 
4028   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4029   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4030   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4031   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4032   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4033   PetscFunctionReturn(0);
4034 }
4035 
4036 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4037 
4038 #undef __FUNCT__
4039 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4040 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4041 {
4042   PetscErrorCode      ierr;
4043   Mat                 B_mpi;
4044   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4045   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4046   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4047   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4048   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4049   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4050   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4051   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4052   MPI_Status          *status;
4053   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4054   PetscBT             lnkbt;
4055   Mat_Merge_SeqsToMPI *merge;
4056   PetscContainer      container;
4057 
4058   PetscFunctionBegin;
4059   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4060 
4061   /* make sure it is a PETSc comm */
4062   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4063   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4064   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4065 
4066   ierr = PetscNew(&merge);CHKERRQ(ierr);
4067   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4068 
4069   /* determine row ownership */
4070   /*---------------------------------------------------------*/
4071   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4072   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4073   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4074   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4075   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4076   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4077   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4078 
4079   m      = merge->rowmap->n;
4080   owners = merge->rowmap->range;
4081 
4082   /* determine the number of messages to send, their lengths */
4083   /*---------------------------------------------------------*/
4084   len_s = merge->len_s;
4085 
4086   len          = 0; /* length of buf_si[] */
4087   merge->nsend = 0;
4088   for (proc=0; proc<size; proc++) {
4089     len_si[proc] = 0;
4090     if (proc == rank) {
4091       len_s[proc] = 0;
4092     } else {
4093       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4094       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4095     }
4096     if (len_s[proc]) {
4097       merge->nsend++;
4098       nrows = 0;
4099       for (i=owners[proc]; i<owners[proc+1]; i++) {
4100         if (ai[i+1] > ai[i]) nrows++;
4101       }
4102       len_si[proc] = 2*(nrows+1);
4103       len         += len_si[proc];
4104     }
4105   }
4106 
4107   /* determine the number and length of messages to receive for ij-structure */
4108   /*-------------------------------------------------------------------------*/
4109   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4110   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4111 
4112   /* post the Irecv of j-structure */
4113   /*-------------------------------*/
4114   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4115   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4116 
4117   /* post the Isend of j-structure */
4118   /*--------------------------------*/
4119   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4120 
4121   for (proc=0, k=0; proc<size; proc++) {
4122     if (!len_s[proc]) continue;
4123     i    = owners[proc];
4124     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4125     k++;
4126   }
4127 
4128   /* receives and sends of j-structure are complete */
4129   /*------------------------------------------------*/
4130   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4131   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4132 
4133   /* send and recv i-structure */
4134   /*---------------------------*/
4135   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4136   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4137 
4138   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4139   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4140   for (proc=0,k=0; proc<size; proc++) {
4141     if (!len_s[proc]) continue;
4142     /* form outgoing message for i-structure:
4143          buf_si[0]:                 nrows to be sent
4144                [1:nrows]:           row index (global)
4145                [nrows+1:2*nrows+1]: i-structure index
4146     */
4147     /*-------------------------------------------*/
4148     nrows       = len_si[proc]/2 - 1;
4149     buf_si_i    = buf_si + nrows+1;
4150     buf_si[0]   = nrows;
4151     buf_si_i[0] = 0;
4152     nrows       = 0;
4153     for (i=owners[proc]; i<owners[proc+1]; i++) {
4154       anzi = ai[i+1] - ai[i];
4155       if (anzi) {
4156         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4157         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4158         nrows++;
4159       }
4160     }
4161     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4162     k++;
4163     buf_si += len_si[proc];
4164   }
4165 
4166   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4167   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4168 
4169   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4170   for (i=0; i<merge->nrecv; i++) {
4171     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4172   }
4173 
4174   ierr = PetscFree(len_si);CHKERRQ(ierr);
4175   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4176   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4177   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4178   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4179   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4180   ierr = PetscFree(status);CHKERRQ(ierr);
4181 
4182   /* compute a local seq matrix in each processor */
4183   /*----------------------------------------------*/
4184   /* allocate bi array and free space for accumulating nonzero column info */
4185   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4186   bi[0] = 0;
4187 
4188   /* create and initialize a linked list */
4189   nlnk = N+1;
4190   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4191 
4192   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4193   len  = ai[owners[rank+1]] - ai[owners[rank]];
4194   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4195 
4196   current_space = free_space;
4197 
4198   /* determine symbolic info for each local row */
4199   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4200 
4201   for (k=0; k<merge->nrecv; k++) {
4202     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4203     nrows       = *buf_ri_k[k];
4204     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4205     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4206   }
4207 
4208   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4209   len  = 0;
4210   for (i=0; i<m; i++) {
4211     bnzi = 0;
4212     /* add local non-zero cols of this proc's seqmat into lnk */
4213     arow  = owners[rank] + i;
4214     anzi  = ai[arow+1] - ai[arow];
4215     aj    = a->j + ai[arow];
4216     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4217     bnzi += nlnk;
4218     /* add received col data into lnk */
4219     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4220       if (i == *nextrow[k]) { /* i-th row */
4221         anzi  = *(nextai[k]+1) - *nextai[k];
4222         aj    = buf_rj[k] + *nextai[k];
4223         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4224         bnzi += nlnk;
4225         nextrow[k]++; nextai[k]++;
4226       }
4227     }
4228     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4229 
4230     /* if free space is not available, make more free space */
4231     if (current_space->local_remaining<bnzi) {
4232       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4233       nspacedouble++;
4234     }
4235     /* copy data into free space, then initialize lnk */
4236     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4237     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4238 
4239     current_space->array           += bnzi;
4240     current_space->local_used      += bnzi;
4241     current_space->local_remaining -= bnzi;
4242 
4243     bi[i+1] = bi[i] + bnzi;
4244   }
4245 
4246   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4247 
4248   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4249   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4250   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4251 
4252   /* create symbolic parallel matrix B_mpi */
4253   /*---------------------------------------*/
4254   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4255   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4256   if (n==PETSC_DECIDE) {
4257     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4258   } else {
4259     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4260   }
4261   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4262   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4263   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4264   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4265   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4266 
4267   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4268   B_mpi->assembled    = PETSC_FALSE;
4269   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4270   merge->bi           = bi;
4271   merge->bj           = bj;
4272   merge->buf_ri       = buf_ri;
4273   merge->buf_rj       = buf_rj;
4274   merge->coi          = NULL;
4275   merge->coj          = NULL;
4276   merge->owners_co    = NULL;
4277 
4278   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4279 
4280   /* attach the supporting struct to B_mpi for reuse */
4281   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4282   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4283   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4284   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4285   *mpimat = B_mpi;
4286 
4287   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4288   PetscFunctionReturn(0);
4289 }
4290 
4291 #undef __FUNCT__
4292 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4293 /*@C
4294       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4295                  matrices from each processor
4296 
4297     Collective on MPI_Comm
4298 
4299    Input Parameters:
4300 +    comm - the communicators the parallel matrix will live on
4301 .    seqmat - the input sequential matrices
4302 .    m - number of local rows (or PETSC_DECIDE)
4303 .    n - number of local columns (or PETSC_DECIDE)
4304 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4305 
4306    Output Parameter:
4307 .    mpimat - the parallel matrix generated
4308 
4309     Level: advanced
4310 
4311    Notes:
4312      The dimensions of the sequential matrix in each processor MUST be the same.
4313      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4314      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4315 @*/
4316 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4317 {
4318   PetscErrorCode ierr;
4319   PetscMPIInt    size;
4320 
4321   PetscFunctionBegin;
4322   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4323   if (size == 1) {
4324     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4325     if (scall == MAT_INITIAL_MATRIX) {
4326       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4327     } else {
4328       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4329     }
4330     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4331     PetscFunctionReturn(0);
4332   }
4333   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4334   if (scall == MAT_INITIAL_MATRIX) {
4335     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4336   }
4337   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4338   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4339   PetscFunctionReturn(0);
4340 }
4341 
4342 #undef __FUNCT__
4343 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4344 /*@
4345      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4346           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4347           with MatGetSize()
4348 
4349     Not Collective
4350 
4351    Input Parameters:
4352 +    A - the matrix
4353 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4354 
4355    Output Parameter:
4356 .    A_loc - the local sequential matrix generated
4357 
4358     Level: developer
4359 
4360 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4361 
4362 @*/
4363 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4364 {
4365   PetscErrorCode ierr;
4366   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4367   Mat_SeqAIJ     *mat,*a,*b;
4368   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4369   MatScalar      *aa,*ba,*cam;
4370   PetscScalar    *ca;
4371   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4372   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4373   PetscBool      match;
4374   MPI_Comm       comm;
4375   PetscMPIInt    size;
4376 
4377   PetscFunctionBegin;
4378   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4379   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4380   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4381   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4382   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4383 
4384   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4385   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4386   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4387   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4388   aa = a->a; ba = b->a;
4389   if (scall == MAT_INITIAL_MATRIX) {
4390     if (size == 1) {
4391       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4392       PetscFunctionReturn(0);
4393     }
4394 
4395     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4396     ci[0] = 0;
4397     for (i=0; i<am; i++) {
4398       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4399     }
4400     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4401     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4402     k    = 0;
4403     for (i=0; i<am; i++) {
4404       ncols_o = bi[i+1] - bi[i];
4405       ncols_d = ai[i+1] - ai[i];
4406       /* off-diagonal portion of A */
4407       for (jo=0; jo<ncols_o; jo++) {
4408         col = cmap[*bj];
4409         if (col >= cstart) break;
4410         cj[k]   = col; bj++;
4411         ca[k++] = *ba++;
4412       }
4413       /* diagonal portion of A */
4414       for (j=0; j<ncols_d; j++) {
4415         cj[k]   = cstart + *aj++;
4416         ca[k++] = *aa++;
4417       }
4418       /* off-diagonal portion of A */
4419       for (j=jo; j<ncols_o; j++) {
4420         cj[k]   = cmap[*bj++];
4421         ca[k++] = *ba++;
4422       }
4423     }
4424     /* put together the new matrix */
4425     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4426     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4427     /* Since these are PETSc arrays, change flags to free them as necessary. */
4428     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4429     mat->free_a  = PETSC_TRUE;
4430     mat->free_ij = PETSC_TRUE;
4431     mat->nonew   = 0;
4432   } else if (scall == MAT_REUSE_MATRIX) {
4433     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4434     ci = mat->i; cj = mat->j; cam = mat->a;
4435     for (i=0; i<am; i++) {
4436       /* off-diagonal portion of A */
4437       ncols_o = bi[i+1] - bi[i];
4438       for (jo=0; jo<ncols_o; jo++) {
4439         col = cmap[*bj];
4440         if (col >= cstart) break;
4441         *cam++ = *ba++; bj++;
4442       }
4443       /* diagonal portion of A */
4444       ncols_d = ai[i+1] - ai[i];
4445       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4446       /* off-diagonal portion of A */
4447       for (j=jo; j<ncols_o; j++) {
4448         *cam++ = *ba++; bj++;
4449       }
4450     }
4451   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4452   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4453   PetscFunctionReturn(0);
4454 }
4455 
4456 #undef __FUNCT__
4457 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4458 /*@C
4459      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4460 
4461     Not Collective
4462 
4463    Input Parameters:
4464 +    A - the matrix
4465 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4466 -    row, col - index sets of rows and columns to extract (or NULL)
4467 
4468    Output Parameter:
4469 .    A_loc - the local sequential matrix generated
4470 
4471     Level: developer
4472 
4473 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4474 
4475 @*/
4476 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4477 {
4478   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4479   PetscErrorCode ierr;
4480   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4481   IS             isrowa,iscola;
4482   Mat            *aloc;
4483   PetscBool      match;
4484 
4485   PetscFunctionBegin;
4486   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4487   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4488   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4489   if (!row) {
4490     start = A->rmap->rstart; end = A->rmap->rend;
4491     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4492   } else {
4493     isrowa = *row;
4494   }
4495   if (!col) {
4496     start = A->cmap->rstart;
4497     cmap  = a->garray;
4498     nzA   = a->A->cmap->n;
4499     nzB   = a->B->cmap->n;
4500     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4501     ncols = 0;
4502     for (i=0; i<nzB; i++) {
4503       if (cmap[i] < start) idx[ncols++] = cmap[i];
4504       else break;
4505     }
4506     imark = i;
4507     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4508     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4509     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4510   } else {
4511     iscola = *col;
4512   }
4513   if (scall != MAT_INITIAL_MATRIX) {
4514     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4515     aloc[0] = *A_loc;
4516   }
4517   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4518   *A_loc = aloc[0];
4519   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4520   if (!row) {
4521     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4522   }
4523   if (!col) {
4524     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4525   }
4526   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4527   PetscFunctionReturn(0);
4528 }
4529 
4530 #undef __FUNCT__
4531 #define __FUNCT__ "MatGetBrowsOfAcols"
4532 /*@C
4533     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4534 
4535     Collective on Mat
4536 
4537    Input Parameters:
4538 +    A,B - the matrices in mpiaij format
4539 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4540 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4541 
4542    Output Parameter:
4543 +    rowb, colb - index sets of rows and columns of B to extract
4544 -    B_seq - the sequential matrix generated
4545 
4546     Level: developer
4547 
4548 @*/
4549 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4550 {
4551   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4552   PetscErrorCode ierr;
4553   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4554   IS             isrowb,iscolb;
4555   Mat            *bseq=NULL;
4556 
4557   PetscFunctionBegin;
4558   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4559     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4560   }
4561   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4562 
4563   if (scall == MAT_INITIAL_MATRIX) {
4564     start = A->cmap->rstart;
4565     cmap  = a->garray;
4566     nzA   = a->A->cmap->n;
4567     nzB   = a->B->cmap->n;
4568     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4569     ncols = 0;
4570     for (i=0; i<nzB; i++) {  /* row < local row index */
4571       if (cmap[i] < start) idx[ncols++] = cmap[i];
4572       else break;
4573     }
4574     imark = i;
4575     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4576     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4577     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4578     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4579   } else {
4580     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4581     isrowb  = *rowb; iscolb = *colb;
4582     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4583     bseq[0] = *B_seq;
4584   }
4585   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4586   *B_seq = bseq[0];
4587   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4588   if (!rowb) {
4589     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4590   } else {
4591     *rowb = isrowb;
4592   }
4593   if (!colb) {
4594     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4595   } else {
4596     *colb = iscolb;
4597   }
4598   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4599   PetscFunctionReturn(0);
4600 }
4601 
4602 #undef __FUNCT__
4603 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4604 /*
4605     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4606     of the OFF-DIAGONAL portion of local A
4607 
4608     Collective on Mat
4609 
4610    Input Parameters:
4611 +    A,B - the matrices in mpiaij format
4612 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4613 
4614    Output Parameter:
4615 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4616 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4617 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4618 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4619 
4620     Level: developer
4621 
4622 */
4623 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4624 {
4625   VecScatter_MPI_General *gen_to,*gen_from;
4626   PetscErrorCode         ierr;
4627   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4628   Mat_SeqAIJ             *b_oth;
4629   VecScatter             ctx =a->Mvctx;
4630   MPI_Comm               comm;
4631   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4632   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4633   PetscScalar            *rvalues,*svalues;
4634   MatScalar              *b_otha,*bufa,*bufA;
4635   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4636   MPI_Request            *rwaits = NULL,*swaits = NULL;
4637   MPI_Status             *sstatus,rstatus;
4638   PetscMPIInt            jj,size;
4639   PetscInt               *cols,sbs,rbs;
4640   PetscScalar            *vals;
4641 
4642   PetscFunctionBegin;
4643   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4644   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4645 
4646   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4647     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4648   }
4649   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4650   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4651 
4652   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4653   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4654   rvalues  = gen_from->values; /* holds the length of receiving row */
4655   svalues  = gen_to->values;   /* holds the length of sending row */
4656   nrecvs   = gen_from->n;
4657   nsends   = gen_to->n;
4658 
4659   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4660   srow    = gen_to->indices;    /* local row index to be sent */
4661   sstarts = gen_to->starts;
4662   sprocs  = gen_to->procs;
4663   sstatus = gen_to->sstatus;
4664   sbs     = gen_to->bs;
4665   rstarts = gen_from->starts;
4666   rprocs  = gen_from->procs;
4667   rbs     = gen_from->bs;
4668 
4669   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4670   if (scall == MAT_INITIAL_MATRIX) {
4671     /* i-array */
4672     /*---------*/
4673     /*  post receives */
4674     for (i=0; i<nrecvs; i++) {
4675       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4676       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4677       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4678     }
4679 
4680     /* pack the outgoing message */
4681     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4682 
4683     sstartsj[0] = 0;
4684     rstartsj[0] = 0;
4685     len         = 0; /* total length of j or a array to be sent */
4686     k           = 0;
4687     for (i=0; i<nsends; i++) {
4688       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4689       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4690       for (j=0; j<nrows; j++) {
4691         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4692         for (l=0; l<sbs; l++) {
4693           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4694 
4695           rowlen[j*sbs+l] = ncols;
4696 
4697           len += ncols;
4698           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4699         }
4700         k++;
4701       }
4702       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4703 
4704       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4705     }
4706     /* recvs and sends of i-array are completed */
4707     i = nrecvs;
4708     while (i--) {
4709       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4710     }
4711     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4712 
4713     /* allocate buffers for sending j and a arrays */
4714     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4715     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4716 
4717     /* create i-array of B_oth */
4718     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4719 
4720     b_othi[0] = 0;
4721     len       = 0; /* total length of j or a array to be received */
4722     k         = 0;
4723     for (i=0; i<nrecvs; i++) {
4724       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4725       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4726       for (j=0; j<nrows; j++) {
4727         b_othi[k+1] = b_othi[k] + rowlen[j];
4728         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4729         k++;
4730       }
4731       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4732     }
4733 
4734     /* allocate space for j and a arrrays of B_oth */
4735     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4736     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4737 
4738     /* j-array */
4739     /*---------*/
4740     /*  post receives of j-array */
4741     for (i=0; i<nrecvs; i++) {
4742       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4743       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4744     }
4745 
4746     /* pack the outgoing message j-array */
4747     k = 0;
4748     for (i=0; i<nsends; i++) {
4749       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4750       bufJ  = bufj+sstartsj[i];
4751       for (j=0; j<nrows; j++) {
4752         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4753         for (ll=0; ll<sbs; ll++) {
4754           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4755           for (l=0; l<ncols; l++) {
4756             *bufJ++ = cols[l];
4757           }
4758           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4759         }
4760       }
4761       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4762     }
4763 
4764     /* recvs and sends of j-array are completed */
4765     i = nrecvs;
4766     while (i--) {
4767       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4768     }
4769     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4770   } else if (scall == MAT_REUSE_MATRIX) {
4771     sstartsj = *startsj_s;
4772     rstartsj = *startsj_r;
4773     bufa     = *bufa_ptr;
4774     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4775     b_otha   = b_oth->a;
4776   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4777 
4778   /* a-array */
4779   /*---------*/
4780   /*  post receives of a-array */
4781   for (i=0; i<nrecvs; i++) {
4782     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4783     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4784   }
4785 
4786   /* pack the outgoing message a-array */
4787   k = 0;
4788   for (i=0; i<nsends; i++) {
4789     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4790     bufA  = bufa+sstartsj[i];
4791     for (j=0; j<nrows; j++) {
4792       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4793       for (ll=0; ll<sbs; ll++) {
4794         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4795         for (l=0; l<ncols; l++) {
4796           *bufA++ = vals[l];
4797         }
4798         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4799       }
4800     }
4801     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4802   }
4803   /* recvs and sends of a-array are completed */
4804   i = nrecvs;
4805   while (i--) {
4806     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4807   }
4808   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4809   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4810 
4811   if (scall == MAT_INITIAL_MATRIX) {
4812     /* put together the new matrix */
4813     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4814 
4815     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4816     /* Since these are PETSc arrays, change flags to free them as necessary. */
4817     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4818     b_oth->free_a  = PETSC_TRUE;
4819     b_oth->free_ij = PETSC_TRUE;
4820     b_oth->nonew   = 0;
4821 
4822     ierr = PetscFree(bufj);CHKERRQ(ierr);
4823     if (!startsj_s || !bufa_ptr) {
4824       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4825       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4826     } else {
4827       *startsj_s = sstartsj;
4828       *startsj_r = rstartsj;
4829       *bufa_ptr  = bufa;
4830     }
4831   }
4832   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4833   PetscFunctionReturn(0);
4834 }
4835 
4836 #undef __FUNCT__
4837 #define __FUNCT__ "MatGetCommunicationStructs"
4838 /*@C
4839   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4840 
4841   Not Collective
4842 
4843   Input Parameters:
4844 . A - The matrix in mpiaij format
4845 
4846   Output Parameter:
4847 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4848 . colmap - A map from global column index to local index into lvec
4849 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4850 
4851   Level: developer
4852 
4853 @*/
4854 #if defined(PETSC_USE_CTABLE)
4855 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4856 #else
4857 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4858 #endif
4859 {
4860   Mat_MPIAIJ *a;
4861 
4862   PetscFunctionBegin;
4863   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4864   PetscValidPointer(lvec, 2);
4865   PetscValidPointer(colmap, 3);
4866   PetscValidPointer(multScatter, 4);
4867   a = (Mat_MPIAIJ*) A->data;
4868   if (lvec) *lvec = a->lvec;
4869   if (colmap) *colmap = a->colmap;
4870   if (multScatter) *multScatter = a->Mvctx;
4871   PetscFunctionReturn(0);
4872 }
4873 
4874 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4875 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4877 #if defined(PETSC_HAVE_ELEMENTAL)
4878 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4879 #endif
4880 
4881 #undef __FUNCT__
4882 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4883 /*
4884     Computes (B'*A')' since computing B*A directly is untenable
4885 
4886                n                       p                          p
4887         (              )       (              )         (                  )
4888       m (      A       )  *  n (       B      )   =   m (         C        )
4889         (              )       (              )         (                  )
4890 
4891 */
4892 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4893 {
4894   PetscErrorCode ierr;
4895   Mat            At,Bt,Ct;
4896 
4897   PetscFunctionBegin;
4898   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4899   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4900   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4901   ierr = MatDestroy(&At);CHKERRQ(ierr);
4902   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4903   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4904   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4905   PetscFunctionReturn(0);
4906 }
4907 
4908 #undef __FUNCT__
4909 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4910 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4911 {
4912   PetscErrorCode ierr;
4913   PetscInt       m=A->rmap->n,n=B->cmap->n;
4914   Mat            Cmat;
4915 
4916   PetscFunctionBegin;
4917   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4918   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4919   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4920   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4921   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4922   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4923   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4924   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4925 
4926   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4927 
4928   *C = Cmat;
4929   PetscFunctionReturn(0);
4930 }
4931 
4932 /* ----------------------------------------------------------------*/
4933 #undef __FUNCT__
4934 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4935 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4936 {
4937   PetscErrorCode ierr;
4938 
4939   PetscFunctionBegin;
4940   if (scall == MAT_INITIAL_MATRIX) {
4941     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4942     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4943     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4944   }
4945   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4946   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4947   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4948   PetscFunctionReturn(0);
4949 }
4950 
4951 /*MC
4952    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4953 
4954    Options Database Keys:
4955 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4956 
4957   Level: beginner
4958 
4959 .seealso: MatCreateAIJ()
4960 M*/
4961 
4962 #undef __FUNCT__
4963 #define __FUNCT__ "MatCreate_MPIAIJ"
4964 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4965 {
4966   Mat_MPIAIJ     *b;
4967   PetscErrorCode ierr;
4968   PetscMPIInt    size;
4969 
4970   PetscFunctionBegin;
4971   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4972 
4973   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4974   B->data       = (void*)b;
4975   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4976   B->assembled  = PETSC_FALSE;
4977   B->insertmode = NOT_SET_VALUES;
4978   b->size       = size;
4979 
4980   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4981 
4982   /* build cache for off array entries formed */
4983   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4984 
4985   b->donotstash  = PETSC_FALSE;
4986   b->colmap      = 0;
4987   b->garray      = 0;
4988   b->roworiented = PETSC_TRUE;
4989 
4990   /* stuff used for matrix vector multiply */
4991   b->lvec  = NULL;
4992   b->Mvctx = NULL;
4993 
4994   /* stuff for MatGetRow() */
4995   b->rowindices   = 0;
4996   b->rowvalues    = 0;
4997   b->getrowactive = PETSC_FALSE;
4998 
4999   /* flexible pointer used in CUSP/CUSPARSE classes */
5000   b->spptr = NULL;
5001 
5002   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5004   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5012 #if defined(PETSC_HAVE_ELEMENTAL)
5013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5014 #endif
5015   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5016   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5018   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5024 /*@C
5025      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5026          and "off-diagonal" part of the matrix in CSR format.
5027 
5028    Collective on MPI_Comm
5029 
5030    Input Parameters:
5031 +  comm - MPI communicator
5032 .  m - number of local rows (Cannot be PETSC_DECIDE)
5033 .  n - This value should be the same as the local size used in creating the
5034        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5035        calculated if N is given) For square matrices n is almost always m.
5036 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5037 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5038 .   i - row indices for "diagonal" portion of matrix
5039 .   j - column indices
5040 .   a - matrix values
5041 .   oi - row indices for "off-diagonal" portion of matrix
5042 .   oj - column indices
5043 -   oa - matrix values
5044 
5045    Output Parameter:
5046 .   mat - the matrix
5047 
5048    Level: advanced
5049 
5050    Notes:
5051        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5052        must free the arrays once the matrix has been destroyed and not before.
5053 
5054        The i and j indices are 0 based
5055 
5056        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5057 
5058        This sets local rows and cannot be used to set off-processor values.
5059 
5060        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5061        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5062        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5063        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5064        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5065        communication if it is known that only local entries will be set.
5066 
5067 .keywords: matrix, aij, compressed row, sparse, parallel
5068 
5069 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5070           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5071 @*/
5072 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5073 {
5074   PetscErrorCode ierr;
5075   Mat_MPIAIJ     *maij;
5076 
5077   PetscFunctionBegin;
5078   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5079   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5080   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5081   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5082   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5083   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5084   maij = (Mat_MPIAIJ*) (*mat)->data;
5085 
5086   (*mat)->preallocated = PETSC_TRUE;
5087 
5088   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5089   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5090 
5091   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5092   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5093 
5094   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5095   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5096   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5097   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5098 
5099   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5100   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5101   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5102   PetscFunctionReturn(0);
5103 }
5104 
5105 /*
5106     Special version for direct calls from Fortran
5107 */
5108 #include <petsc/private/fortranimpl.h>
5109 
5110 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5111 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5112 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5113 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5114 #endif
5115 
5116 /* Change these macros so can be used in void function */
5117 #undef CHKERRQ
5118 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5119 #undef SETERRQ2
5120 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5121 #undef SETERRQ3
5122 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5123 #undef SETERRQ
5124 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5125 
5126 #undef __FUNCT__
5127 #define __FUNCT__ "matsetvaluesmpiaij_"
5128 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5129 {
5130   Mat            mat  = *mmat;
5131   PetscInt       m    = *mm, n = *mn;
5132   InsertMode     addv = *maddv;
5133   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5134   PetscScalar    value;
5135   PetscErrorCode ierr;
5136 
5137   MatCheckPreallocated(mat,1);
5138   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5139 
5140 #if defined(PETSC_USE_DEBUG)
5141   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5142 #endif
5143   {
5144     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5145     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5146     PetscBool roworiented = aij->roworiented;
5147 
5148     /* Some Variables required in the macro */
5149     Mat        A                 = aij->A;
5150     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5151     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5152     MatScalar  *aa               = a->a;
5153     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5154     Mat        B                 = aij->B;
5155     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5156     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5157     MatScalar  *ba               = b->a;
5158 
5159     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5160     PetscInt  nonew = a->nonew;
5161     MatScalar *ap1,*ap2;
5162 
5163     PetscFunctionBegin;
5164     for (i=0; i<m; i++) {
5165       if (im[i] < 0) continue;
5166 #if defined(PETSC_USE_DEBUG)
5167       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5168 #endif
5169       if (im[i] >= rstart && im[i] < rend) {
5170         row      = im[i] - rstart;
5171         lastcol1 = -1;
5172         rp1      = aj + ai[row];
5173         ap1      = aa + ai[row];
5174         rmax1    = aimax[row];
5175         nrow1    = ailen[row];
5176         low1     = 0;
5177         high1    = nrow1;
5178         lastcol2 = -1;
5179         rp2      = bj + bi[row];
5180         ap2      = ba + bi[row];
5181         rmax2    = bimax[row];
5182         nrow2    = bilen[row];
5183         low2     = 0;
5184         high2    = nrow2;
5185 
5186         for (j=0; j<n; j++) {
5187           if (roworiented) value = v[i*n+j];
5188           else value = v[i+j*m];
5189           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5190           if (in[j] >= cstart && in[j] < cend) {
5191             col = in[j] - cstart;
5192             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5193           } else if (in[j] < 0) continue;
5194 #if defined(PETSC_USE_DEBUG)
5195           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5196 #endif
5197           else {
5198             if (mat->was_assembled) {
5199               if (!aij->colmap) {
5200                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5201               }
5202 #if defined(PETSC_USE_CTABLE)
5203               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5204               col--;
5205 #else
5206               col = aij->colmap[in[j]] - 1;
5207 #endif
5208               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5209                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5210                 col  =  in[j];
5211                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5212                 B     = aij->B;
5213                 b     = (Mat_SeqAIJ*)B->data;
5214                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5215                 rp2   = bj + bi[row];
5216                 ap2   = ba + bi[row];
5217                 rmax2 = bimax[row];
5218                 nrow2 = bilen[row];
5219                 low2  = 0;
5220                 high2 = nrow2;
5221                 bm    = aij->B->rmap->n;
5222                 ba    = b->a;
5223               }
5224             } else col = in[j];
5225             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5226           }
5227         }
5228       } else if (!aij->donotstash) {
5229         if (roworiented) {
5230           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5231         } else {
5232           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5233         }
5234       }
5235     }
5236   }
5237   PetscFunctionReturnVoid();
5238 }
5239 
5240