xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2bf68e3e0f2a61f71e7c65bee250bfa1c8ce0cdb)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatSetBlockSizes_MPIAIJ"
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
63 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
64 {
65   PetscErrorCode  ierr;
66   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
67   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
68   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
69   const PetscInt  *ia,*ib;
70   const MatScalar *aa,*bb;
71   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
72   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
73 
74   PetscFunctionBegin;
75   *keptrows = 0;
76   ia        = a->i;
77   ib        = b->i;
78   for (i=0; i<m; i++) {
79     na = ia[i+1] - ia[i];
80     nb = ib[i+1] - ib[i];
81     if (!na && !nb) {
82       cnt++;
83       goto ok1;
84     }
85     aa = a->a + ia[i];
86     for (j=0; j<na; j++) {
87       if (aa[j] != 0.0) goto ok1;
88     }
89     bb = b->a + ib[i];
90     for (j=0; j <nb; j++) {
91       if (bb[j] != 0.0) goto ok1;
92     }
93     cnt++;
94 ok1:;
95   }
96   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
97   if (!n0rows) PetscFunctionReturn(0);
98   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
99   cnt  = 0;
100   for (i=0; i<m; i++) {
101     na = ia[i+1] - ia[i];
102     nb = ib[i+1] - ib[i];
103     if (!na && !nb) continue;
104     aa = a->a + ia[i];
105     for (j=0; j<na;j++) {
106       if (aa[j] != 0.0) {
107         rows[cnt++] = rstart + i;
108         goto ok2;
109       }
110     }
111     bb = b->a + ib[i];
112     for (j=0; j<nb; j++) {
113       if (bb[j] != 0.0) {
114         rows[cnt++] = rstart + i;
115         goto ok2;
116       }
117     }
118 ok2:;
119   }
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
126 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
127 {
128   PetscErrorCode    ierr;
129   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
130 
131   PetscFunctionBegin;
132   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
133     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
134   } else {
135     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
136   }
137   PetscFunctionReturn(0);
138 }
139 
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
143 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
144 {
145   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
146   PetscErrorCode ierr;
147   PetscInt       i,rstart,nrows,*rows;
148 
149   PetscFunctionBegin;
150   *zrows = NULL;
151   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
152   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
153   for (i=0; i<nrows; i++) rows[i] += rstart;
154   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
160 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
161 {
162   PetscErrorCode ierr;
163   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
164   PetscInt       i,n,*garray = aij->garray;
165   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
166   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
167   PetscReal      *work;
168 
169   PetscFunctionBegin;
170   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
171   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
172   if (type == NORM_2) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
178     }
179   } else if (type == NORM_1) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
185     }
186   } else if (type == NORM_INFINITY) {
187     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
188       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
189     }
190     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
191       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
192     }
193 
194   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
195   if (type == NORM_INFINITY) {
196     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
197   } else {
198     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
199   }
200   ierr = PetscFree(work);CHKERRQ(ierr);
201   if (type == NORM_2) {
202     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
203   }
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
209 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
210 {
211   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
212   IS              sis,gis;
213   PetscErrorCode  ierr;
214   const PetscInt  *isis,*igis;
215   PetscInt        n,*iis,nsis,ngis,rstart,i;
216 
217   PetscFunctionBegin;
218   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
219   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
220   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
221   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
222   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
223   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
224 
225   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
226   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
227   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
228   n    = ngis + nsis;
229   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
230   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
231   for (i=0; i<n; i++) iis[i] += rstart;
232   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
233 
234   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
235   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
236   ierr = ISDestroy(&sis);CHKERRQ(ierr);
237   ierr = ISDestroy(&gis);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatDistribute_MPIAIJ"
243 /*
244     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
245     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
246 
247     Only for square matrices
248 
249     Used by a preconditioner, hence PETSC_EXTERN
250 */
251 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
252 {
253   PetscMPIInt    rank,size;
254   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
255   PetscErrorCode ierr;
256   Mat            mat;
257   Mat_SeqAIJ     *gmata;
258   PetscMPIInt    tag;
259   MPI_Status     status;
260   PetscBool      aij;
261   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
262 
263   PetscFunctionBegin;
264   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
265   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
266   if (!rank) {
267     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
268     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
269   }
270   if (reuse == MAT_INITIAL_MATRIX) {
271     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
272     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
273     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
274     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
275     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
276     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
277     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
278     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
279     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
280 
281     rowners[0] = 0;
282     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
283     rstart = rowners[rank];
284     rend   = rowners[rank+1];
285     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
286     if (!rank) {
287       gmata = (Mat_SeqAIJ*) gmat->data;
288       /* send row lengths to all processors */
289       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
290       for (i=1; i<size; i++) {
291         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292       }
293       /* determine number diagonal and off-diagonal counts */
294       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
295       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
296       jj   = 0;
297       for (i=0; i<m; i++) {
298         for (j=0; j<dlens[i]; j++) {
299           if (gmata->j[jj] < rstart) ld[i]++;
300           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
301           jj++;
302         }
303       }
304       /* send column indices to other processes */
305       for (i=1; i<size; i++) {
306         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
307         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
308         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
309       }
310 
311       /* send numerical values to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
315       }
316       gmataa = gmata->a;
317       gmataj = gmata->j;
318 
319     } else {
320       /* receive row lengths */
321       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
322       /* receive column indices */
323       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
324       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
325       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
326       /* determine number diagonal and off-diagonal counts */
327       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
328       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
329       jj   = 0;
330       for (i=0; i<m; i++) {
331         for (j=0; j<dlens[i]; j++) {
332           if (gmataj[jj] < rstart) ld[i]++;
333           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
334           jj++;
335         }
336       }
337       /* receive numerical values */
338       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
339       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
340     }
341     /* set preallocation */
342     for (i=0; i<m; i++) {
343       dlens[i] -= olens[i];
344     }
345     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
346     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
347 
348     for (i=0; i<m; i++) {
349       dlens[i] += olens[i];
350     }
351     cnt = 0;
352     for (i=0; i<m; i++) {
353       row  = rstart + i;
354       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
355       cnt += dlens[i];
356     }
357     if (rank) {
358       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
359     }
360     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
361     ierr = PetscFree(rowners);CHKERRQ(ierr);
362 
363     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
364 
365     *inmat = mat;
366   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
367     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
368     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
369     mat  = *inmat;
370     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
371     if (!rank) {
372       /* send numerical values to other processes */
373       gmata  = (Mat_SeqAIJ*) gmat->data;
374       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
375       gmataa = gmata->a;
376       for (i=1; i<size; i++) {
377         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
378         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
379       }
380       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
381     } else {
382       /* receive numerical values from process 0*/
383       nz   = Ad->nz + Ao->nz;
384       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
385       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
386     }
387     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
388     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
389     ad = Ad->a;
390     ao = Ao->a;
391     if (mat->rmap->n) {
392       i  = 0;
393       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
394       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
395     }
396     for (i=1; i<mat->rmap->n; i++) {
397       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
398       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
399     }
400     i--;
401     if (mat->rmap->n) {
402       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
403     }
404     if (rank) {
405       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
406     }
407   }
408   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
409   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 /*
414   Local utility routine that creates a mapping from the global column
415 number to the local number in the off-diagonal part of the local
416 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
417 a slightly higher hash table cost; without it it is not scalable (each processor
418 has an order N integer array but is fast to acess.
419 */
420 #undef __FUNCT__
421 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
422 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
423 {
424   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
425   PetscErrorCode ierr;
426   PetscInt       n = aij->B->cmap->n,i;
427 
428   PetscFunctionBegin;
429   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
430 #if defined(PETSC_USE_CTABLE)
431   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
432   for (i=0; i<n; i++) {
433     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
434   }
435 #else
436   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
438   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
439 #endif
440   PetscFunctionReturn(0);
441 }
442 
443 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
444 { \
445     if (col <= lastcol1)  low1 = 0;     \
446     else                 high1 = nrow1; \
447     lastcol1 = col;\
448     while (high1-low1 > 5) { \
449       t = (low1+high1)/2; \
450       if (rp1[t] > col) high1 = t; \
451       else              low1  = t; \
452     } \
453       for (_i=low1; _i<high1; _i++) { \
454         if (rp1[_i] > col) break; \
455         if (rp1[_i] == col) { \
456           if (addv == ADD_VALUES) ap1[_i] += value;   \
457           else                    ap1[_i] = value; \
458           goto a_noinsert; \
459         } \
460       }  \
461       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
462       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
463       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
464       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
465       N = nrow1++ - 1; a->nz++; high1++; \
466       /* shift up all the later entries in this row */ \
467       for (ii=N; ii>=_i; ii--) { \
468         rp1[ii+1] = rp1[ii]; \
469         ap1[ii+1] = ap1[ii]; \
470       } \
471       rp1[_i] = col;  \
472       ap1[_i] = value;  \
473       A->nonzerostate++;\
474       a_noinsert: ; \
475       ailen[row] = nrow1; \
476 }
477 
478 
479 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
480   { \
481     if (col <= lastcol2) low2 = 0;                        \
482     else high2 = nrow2;                                   \
483     lastcol2 = col;                                       \
484     while (high2-low2 > 5) {                              \
485       t = (low2+high2)/2;                                 \
486       if (rp2[t] > col) high2 = t;                        \
487       else             low2  = t;                         \
488     }                                                     \
489     for (_i=low2; _i<high2; _i++) {                       \
490       if (rp2[_i] > col) break;                           \
491       if (rp2[_i] == col) {                               \
492         if (addv == ADD_VALUES) ap2[_i] += value;         \
493         else                    ap2[_i] = value;          \
494         goto b_noinsert;                                  \
495       }                                                   \
496     }                                                     \
497     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
498     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
499     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
500     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
501     N = nrow2++ - 1; b->nz++; high2++;                    \
502     /* shift up all the later entries in this row */      \
503     for (ii=N; ii>=_i; ii--) {                            \
504       rp2[ii+1] = rp2[ii];                                \
505       ap2[ii+1] = ap2[ii];                                \
506     }                                                     \
507     rp2[_i] = col;                                        \
508     ap2[_i] = value;                                      \
509     B->nonzerostate++;                                    \
510     b_noinsert: ;                                         \
511     bilen[row] = nrow2;                                   \
512   }
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
516 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
517 {
518   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
519   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
520   PetscErrorCode ierr;
521   PetscInt       l,*garray = mat->garray,diag;
522 
523   PetscFunctionBegin;
524   /* code only works for square matrices A */
525 
526   /* find size of row to the left of the diagonal part */
527   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
528   row  = row - diag;
529   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
530     if (garray[b->j[b->i[row]+l]] > diag) break;
531   }
532   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
533 
534   /* diagonal part */
535   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
536 
537   /* right of diagonal part */
538   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "MatSetValues_MPIAIJ"
544 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
545 {
546   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
547   PetscScalar    value;
548   PetscErrorCode ierr;
549   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
550   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
551   PetscBool      roworiented = aij->roworiented;
552 
553   /* Some Variables required in the macro */
554   Mat        A                 = aij->A;
555   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
556   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
557   MatScalar  *aa               = a->a;
558   PetscBool  ignorezeroentries = a->ignorezeroentries;
559   Mat        B                 = aij->B;
560   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
561   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
562   MatScalar  *ba               = b->a;
563 
564   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
565   PetscInt  nonew;
566   MatScalar *ap1,*ap2;
567 
568   PetscFunctionBegin;
569   for (i=0; i<m; i++) {
570     if (im[i] < 0) continue;
571 #if defined(PETSC_USE_DEBUG)
572     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
573 #endif
574     if (im[i] >= rstart && im[i] < rend) {
575       row      = im[i] - rstart;
576       lastcol1 = -1;
577       rp1      = aj + ai[row];
578       ap1      = aa + ai[row];
579       rmax1    = aimax[row];
580       nrow1    = ailen[row];
581       low1     = 0;
582       high1    = nrow1;
583       lastcol2 = -1;
584       rp2      = bj + bi[row];
585       ap2      = ba + bi[row];
586       rmax2    = bimax[row];
587       nrow2    = bilen[row];
588       low2     = 0;
589       high2    = nrow2;
590 
591       for (j=0; j<n; j++) {
592         if (roworiented) value = v[i*n+j];
593         else             value = v[i+j*m];
594         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
595         if (in[j] >= cstart && in[j] < cend) {
596           col   = in[j] - cstart;
597           nonew = a->nonew;
598           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
599         } else if (in[j] < 0) continue;
600 #if defined(PETSC_USE_DEBUG)
601         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
602 #endif
603         else {
604           if (mat->was_assembled) {
605             if (!aij->colmap) {
606               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
607             }
608 #if defined(PETSC_USE_CTABLE)
609             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
610             col--;
611 #else
612             col = aij->colmap[in[j]] - 1;
613 #endif
614             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
615               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
616               col  =  in[j];
617               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
618               B     = aij->B;
619               b     = (Mat_SeqAIJ*)B->data;
620               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
621               rp2   = bj + bi[row];
622               ap2   = ba + bi[row];
623               rmax2 = bimax[row];
624               nrow2 = bilen[row];
625               low2  = 0;
626               high2 = nrow2;
627               bm    = aij->B->rmap->n;
628               ba    = b->a;
629             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
630           } else col = in[j];
631           nonew = b->nonew;
632           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
633         }
634       }
635     } else {
636       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
637       if (!aij->donotstash) {
638         mat->assembled = PETSC_FALSE;
639         if (roworiented) {
640           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
641         } else {
642           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
643         }
644       }
645     }
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatGetValues_MPIAIJ"
652 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
653 {
654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
655   PetscErrorCode ierr;
656   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
657   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
658 
659   PetscFunctionBegin;
660   for (i=0; i<m; i++) {
661     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
662     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
663     if (idxm[i] >= rstart && idxm[i] < rend) {
664       row = idxm[i] - rstart;
665       for (j=0; j<n; j++) {
666         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
667         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
668         if (idxn[j] >= cstart && idxn[j] < cend) {
669           col  = idxn[j] - cstart;
670           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
671         } else {
672           if (!aij->colmap) {
673             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
674           }
675 #if defined(PETSC_USE_CTABLE)
676           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
677           col--;
678 #else
679           col = aij->colmap[idxn[j]] - 1;
680 #endif
681           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
682           else {
683             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
684           }
685         }
686       }
687     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
688   }
689   PetscFunctionReturn(0);
690 }
691 
692 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
696 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
697 {
698   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
699   PetscErrorCode ierr;
700   PetscInt       nstash,reallocs;
701 
702   PetscFunctionBegin;
703   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
704 
705   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
706   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
707   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
713 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
714 {
715   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
716   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
717   PetscErrorCode ierr;
718   PetscMPIInt    n;
719   PetscInt       i,j,rstart,ncols,flg;
720   PetscInt       *row,*col;
721   PetscBool      other_disassembled;
722   PetscScalar    *val;
723 
724   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
725 
726   PetscFunctionBegin;
727   if (!aij->donotstash && !mat->nooffprocentries) {
728     while (1) {
729       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
730       if (!flg) break;
731 
732       for (i=0; i<n; ) {
733         /* Now identify the consecutive vals belonging to the same row */
734         for (j=i,rstart=row[j]; j<n; j++) {
735           if (row[j] != rstart) break;
736         }
737         if (j < n) ncols = j-i;
738         else       ncols = n-i;
739         /* Now assemble all these values with a single function call */
740         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
741 
742         i = j;
743       }
744     }
745     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
746   }
747   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
748   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
749 
750   /* determine if any processor has disassembled, if so we must
751      also disassemble ourselfs, in order that we may reassemble. */
752   /*
753      if nonzero structure of submatrix B cannot change then we know that
754      no processor disassembled thus we can skip this stuff
755   */
756   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
757     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
758     if (mat->was_assembled && !other_disassembled) {
759       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
760     }
761   }
762   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
763     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
764   }
765   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
766   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
767   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
768 
769   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
770 
771   aij->rowvalues = 0;
772 
773   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
774   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
775 
776   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
777   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
778     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
779     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
786 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
787 {
788   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
789   PetscErrorCode ierr;
790 
791   PetscFunctionBegin;
792   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
793   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatZeroRows_MPIAIJ"
799 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
800 {
801   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
802   PetscInt      *lrows;
803   PetscInt       r, len;
804   PetscErrorCode ierr;
805 
806   PetscFunctionBegin;
807   /* get locally owned rows */
808   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
809   /* fix right hand side if needed */
810   if (x && b) {
811     const PetscScalar *xx;
812     PetscScalar       *bb;
813 
814     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
815     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
816     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
817     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
818     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
819   }
820   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
821   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
822   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
823     PetscBool cong;
824     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
825     if (cong) A->congruentlayouts = 1;
826     else      A->congruentlayouts = 0;
827   }
828   if ((diag != 0.0) && A->congruentlayouts) {
829     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
830   } else if (diag != 0.0) {
831     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
832     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
833     for (r = 0; r < len; ++r) {
834       const PetscInt row = lrows[r] + A->rmap->rstart;
835       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
836     }
837     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839   } else {
840     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
841   }
842   ierr = PetscFree(lrows);CHKERRQ(ierr);
843 
844   /* only change matrix nonzero state if pattern was allowed to be changed */
845   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
846     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
847     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 #undef __FUNCT__
853 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
854 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
855 {
856   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
857   PetscErrorCode    ierr;
858   PetscMPIInt       n = A->rmap->n;
859   PetscInt          i,j,r,m,p = 0,len = 0;
860   PetscInt          *lrows,*owners = A->rmap->range;
861   PetscSFNode       *rrows;
862   PetscSF           sf;
863   const PetscScalar *xx;
864   PetscScalar       *bb,*mask;
865   Vec               xmask,lmask;
866   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
867   const PetscInt    *aj, *ii,*ridx;
868   PetscScalar       *aa;
869 
870   PetscFunctionBegin;
871   /* Create SF where leaves are input rows and roots are owned rows */
872   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
873   for (r = 0; r < n; ++r) lrows[r] = -1;
874   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
875   for (r = 0; r < N; ++r) {
876     const PetscInt idx   = rows[r];
877     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
878     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
879       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
880     }
881     rrows[r].rank  = p;
882     rrows[r].index = rows[r] - owners[p];
883   }
884   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
885   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
886   /* Collect flags for rows to be zeroed */
887   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
888   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
889   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
890   /* Compress and put in row numbers */
891   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
892   /* zero diagonal part of matrix */
893   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
894   /* handle off diagonal part of matrix */
895   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
896   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
897   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
898   for (i=0; i<len; i++) bb[lrows[i]] = 1;
899   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
900   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
902   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
903   if (x) {
904     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
907     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
908   }
909   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
910   /* remove zeroed rows of off diagonal matrix */
911   ii = aij->i;
912   for (i=0; i<len; i++) {
913     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
914   }
915   /* loop over all elements of off process part of matrix zeroing removed columns*/
916   if (aij->compressedrow.use) {
917     m    = aij->compressedrow.nrows;
918     ii   = aij->compressedrow.i;
919     ridx = aij->compressedrow.rindex;
920     for (i=0; i<m; i++) {
921       n  = ii[i+1] - ii[i];
922       aj = aij->j + ii[i];
923       aa = aij->a + ii[i];
924 
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[*ridx] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933       ridx++;
934     }
935   } else { /* do not use compressed row format */
936     m = l->B->rmap->n;
937     for (i=0; i<m; i++) {
938       n  = ii[i+1] - ii[i];
939       aj = aij->j + ii[i];
940       aa = aij->a + ii[i];
941       for (j=0; j<n; j++) {
942         if (PetscAbsScalar(mask[*aj])) {
943           if (b) bb[i] -= *aa*xx[*aj];
944           *aa = 0.0;
945         }
946         aa++;
947         aj++;
948       }
949     }
950   }
951   if (x) {
952     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
953     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
954   }
955   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
956   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
957   ierr = PetscFree(lrows);CHKERRQ(ierr);
958 
959   /* only change matrix nonzero state if pattern was allowed to be changed */
960   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
961     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
962     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatMult_MPIAIJ"
969 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
970 {
971   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
972   PetscErrorCode ierr;
973   PetscInt       nt;
974 
975   PetscFunctionBegin;
976   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
977   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
978   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
979   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
980   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
988 {
989   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatMultAdd_MPIAIJ"
999 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1000 {
1001   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1002   PetscErrorCode ierr;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1007   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1014 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018   PetscBool      merged;
1019 
1020   PetscFunctionBegin;
1021   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1022   /* do nondiagonal part */
1023   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1024   if (!merged) {
1025     /* send it on its way */
1026     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1027     /* do local part */
1028     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1029     /* receive remote parts: note this assumes the values are not actually */
1030     /* added in yy until the next line, */
1031     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1032   } else {
1033     /* do local part */
1034     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1035     /* send it on its way */
1036     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1037     /* values actually were received in the Begin() but we need to call this nop */
1038     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1039   }
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1045 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1046 {
1047   MPI_Comm       comm;
1048   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1049   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1050   IS             Me,Notme;
1051   PetscErrorCode ierr;
1052   PetscInt       M,N,first,last,*notme,i;
1053   PetscMPIInt    size;
1054 
1055   PetscFunctionBegin;
1056   /* Easy test: symmetric diagonal block */
1057   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1058   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1059   if (!*f) PetscFunctionReturn(0);
1060   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1061   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1062   if (size == 1) PetscFunctionReturn(0);
1063 
1064   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1065   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1066   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1067   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1068   for (i=0; i<first; i++) notme[i] = i;
1069   for (i=last; i<M; i++) notme[i-last+first] = i;
1070   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1071   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1072   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1073   Aoff = Aoffs[0];
1074   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1075   Boff = Boffs[0];
1076   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1077   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1078   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1079   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1080   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1081   ierr = PetscFree(notme);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1087 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090   PetscErrorCode ierr;
1091 
1092   PetscFunctionBegin;
1093   /* do nondiagonal part */
1094   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1095   /* send it on its way */
1096   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1097   /* do local part */
1098   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1099   /* receive remote parts */
1100   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 /*
1105   This only works correctly for square matrices where the subblock A->A is the
1106    diagonal block
1107 */
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1110 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1111 {
1112   PetscErrorCode ierr;
1113   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1114 
1115   PetscFunctionBegin;
1116   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1117   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1118   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatScale_MPIAIJ"
1124 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1125 {
1126   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1131   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatDestroy_MPIAIJ"
1137 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1138 {
1139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143 #if defined(PETSC_USE_LOG)
1144   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1145 #endif
1146   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1147   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1148   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1149   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1150 #if defined(PETSC_USE_CTABLE)
1151   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1152 #else
1153   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1154 #endif
1155   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1156   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1157   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1158   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1159   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1160   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1161 
1162   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1170 #if defined(PETSC_HAVE_ELEMENTAL)
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1172 #endif
1173 #if defined(PETSC_HAVE_HYPRE)
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1176 #endif
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1182 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1183 {
1184   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1185   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1186   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1187   PetscErrorCode ierr;
1188   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1189   int            fd;
1190   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1191   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1192   PetscScalar    *column_values;
1193   PetscInt       message_count,flowcontrolcount;
1194   FILE           *file;
1195 
1196   PetscFunctionBegin;
1197   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1198   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1199   nz   = A->nz + B->nz;
1200   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1201   if (!rank) {
1202     header[0] = MAT_FILE_CLASSID;
1203     header[1] = mat->rmap->N;
1204     header[2] = mat->cmap->N;
1205 
1206     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1207     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1208     /* get largest number of rows any processor has */
1209     rlen  = mat->rmap->n;
1210     range = mat->rmap->range;
1211     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1212   } else {
1213     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1214     rlen = mat->rmap->n;
1215   }
1216 
1217   /* load up the local row counts */
1218   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1219   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1220 
1221   /* store the row lengths to the file */
1222   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1223   if (!rank) {
1224     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     for (i=1; i<size; i++) {
1226       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1227       rlen = range[i+1] - range[i];
1228       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1229       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     }
1231     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1232   } else {
1233     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1234     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1236   }
1237   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1238 
1239   /* load up the local column indices */
1240   nzmax = nz; /* th processor needs space a largest processor needs */
1241   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1242   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1243   cnt   = 0;
1244   for (i=0; i<mat->rmap->n; i++) {
1245     for (j=B->i[i]; j<B->i[i+1]; j++) {
1246       if ((col = garray[B->j[j]]) > cstart) break;
1247       column_indices[cnt++] = col;
1248     }
1249     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1250     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1251   }
1252   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1253 
1254   /* store the column indices to the file */
1255   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1256   if (!rank) {
1257     MPI_Status status;
1258     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1259     for (i=1; i<size; i++) {
1260       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1261       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1262       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1263       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1264       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1265     }
1266     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1267   } else {
1268     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1269     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1271     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1272   }
1273   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1274 
1275   /* load up the local column values */
1276   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1277   cnt  = 0;
1278   for (i=0; i<mat->rmap->n; i++) {
1279     for (j=B->i[i]; j<B->i[i+1]; j++) {
1280       if (garray[B->j[j]] > cstart) break;
1281       column_values[cnt++] = B->a[j];
1282     }
1283     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1284     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1285   }
1286   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1287 
1288   /* store the column values to the file */
1289   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1290   if (!rank) {
1291     MPI_Status status;
1292     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1293     for (i=1; i<size; i++) {
1294       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1295       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1296       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1297       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1299     }
1300     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1301   } else {
1302     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1303     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1304     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1305     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1306   }
1307   ierr = PetscFree(column_values);CHKERRQ(ierr);
1308 
1309   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1310   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #include <petscdraw.h>
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1317 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1318 {
1319   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1320   PetscErrorCode    ierr;
1321   PetscMPIInt       rank = aij->rank,size = aij->size;
1322   PetscBool         isdraw,iascii,isbinary;
1323   PetscViewer       sviewer;
1324   PetscViewerFormat format;
1325 
1326   PetscFunctionBegin;
1327   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1328   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1329   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1330   if (iascii) {
1331     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1332     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1333       MatInfo   info;
1334       PetscBool inodes;
1335 
1336       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1337       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1338       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1339       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1340       if (!inodes) {
1341         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1342                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1343       } else {
1344         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1345                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1346       }
1347       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1348       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1349       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1351       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1352       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1354       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1355       PetscFunctionReturn(0);
1356     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1357       PetscInt inodecount,inodelimit,*inodes;
1358       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1359       if (inodes) {
1360         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1361       } else {
1362         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1363       }
1364       PetscFunctionReturn(0);
1365     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1366       PetscFunctionReturn(0);
1367     }
1368   } else if (isbinary) {
1369     if (size == 1) {
1370       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1371       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1372     } else {
1373       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1374     }
1375     PetscFunctionReturn(0);
1376   } else if (isdraw) {
1377     PetscDraw draw;
1378     PetscBool isnull;
1379     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1380     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1381     if (isnull) PetscFunctionReturn(0);
1382   }
1383 
1384   {
1385     /* assemble the entire matrix onto first processor. */
1386     Mat        A;
1387     Mat_SeqAIJ *Aloc;
1388     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1389     MatScalar  *a;
1390 
1391     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1392     if (!rank) {
1393       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1394     } else {
1395       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1396     }
1397     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1398     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1399     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1400     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1401     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1402 
1403     /* copy over the A part */
1404     Aloc = (Mat_SeqAIJ*)aij->A->data;
1405     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1406     row  = mat->rmap->rstart;
1407     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1408     for (i=0; i<m; i++) {
1409       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1410       row++;
1411       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1412     }
1413     aj = Aloc->j;
1414     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1415 
1416     /* copy over the B part */
1417     Aloc = (Mat_SeqAIJ*)aij->B->data;
1418     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1419     row  = mat->rmap->rstart;
1420     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1421     ct   = cols;
1422     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1423     for (i=0; i<m; i++) {
1424       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1425       row++;
1426       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1427     }
1428     ierr = PetscFree(ct);CHKERRQ(ierr);
1429     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1430     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1431     /*
1432        Everyone has to call to draw the matrix since the graphics waits are
1433        synchronized across all processors that share the PetscDraw object
1434     */
1435     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1436     if (!rank) {
1437       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1438       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1439     }
1440     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1441     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1442     ierr = MatDestroy(&A);CHKERRQ(ierr);
1443   }
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatView_MPIAIJ"
1449 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1450 {
1451   PetscErrorCode ierr;
1452   PetscBool      iascii,isdraw,issocket,isbinary;
1453 
1454   PetscFunctionBegin;
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1457   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1458   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1459   if (iascii || isdraw || isbinary || issocket) {
1460     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1461   }
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatSOR_MPIAIJ"
1467 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1468 {
1469   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1470   PetscErrorCode ierr;
1471   Vec            bb1 = 0;
1472   PetscBool      hasop;
1473 
1474   PetscFunctionBegin;
1475   if (flag == SOR_APPLY_UPPER) {
1476     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1477     PetscFunctionReturn(0);
1478   }
1479 
1480   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1481     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1482   }
1483 
1484   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1485     if (flag & SOR_ZERO_INITIAL_GUESS) {
1486       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1487       its--;
1488     }
1489 
1490     while (its--) {
1491       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493 
1494       /* update rhs: bb1 = bb - B*x */
1495       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1496       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1497 
1498       /* local sweep */
1499       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1500     }
1501   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1502     if (flag & SOR_ZERO_INITIAL_GUESS) {
1503       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1504       its--;
1505     }
1506     while (its--) {
1507       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1508       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509 
1510       /* update rhs: bb1 = bb - B*x */
1511       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1512       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1513 
1514       /* local sweep */
1515       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1516     }
1517   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1518     if (flag & SOR_ZERO_INITIAL_GUESS) {
1519       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1520       its--;
1521     }
1522     while (its--) {
1523       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1525 
1526       /* update rhs: bb1 = bb - B*x */
1527       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1528       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1529 
1530       /* local sweep */
1531       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1532     }
1533   } else if (flag & SOR_EISENSTAT) {
1534     Vec xx1;
1535 
1536     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1537     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1538 
1539     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1540     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541     if (!mat->diag) {
1542       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1543       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1544     }
1545     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1546     if (hasop) {
1547       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1548     } else {
1549       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1550     }
1551     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1552 
1553     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1554 
1555     /* local sweep */
1556     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1557     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1558     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1559   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1560 
1561   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1562 
1563   matin->factorerrortype = mat->A->factorerrortype;
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatPermute_MPIAIJ"
1569 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1570 {
1571   Mat            aA,aB,Aperm;
1572   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1573   PetscScalar    *aa,*ba;
1574   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1575   PetscSF        rowsf,sf;
1576   IS             parcolp = NULL;
1577   PetscBool      done;
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1582   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1583   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1584   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1585 
1586   /* Invert row permutation to find out where my rows should go */
1587   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1588   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1589   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1590   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1591   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1592   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1593 
1594   /* Invert column permutation to find out where my columns should go */
1595   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1596   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1597   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1598   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1599   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1600   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1601   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1602 
1603   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1604   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1605   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1606 
1607   /* Find out where my gcols should go */
1608   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1609   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1610   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1611   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1612   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1613   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1614   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1615   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1616 
1617   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1618   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1619   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1620   for (i=0; i<m; i++) {
1621     PetscInt row = rdest[i],rowner;
1622     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1623     for (j=ai[i]; j<ai[i+1]; j++) {
1624       PetscInt cowner,col = cdest[aj[j]];
1625       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1626       if (rowner == cowner) dnnz[i]++;
1627       else onnz[i]++;
1628     }
1629     for (j=bi[i]; j<bi[i+1]; j++) {
1630       PetscInt cowner,col = gcdest[bj[j]];
1631       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1632       if (rowner == cowner) dnnz[i]++;
1633       else onnz[i]++;
1634     }
1635   }
1636   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1637   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1638   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1639   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1640   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1641 
1642   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1643   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1644   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1645   for (i=0; i<m; i++) {
1646     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1647     PetscInt j0,rowlen;
1648     rowlen = ai[i+1] - ai[i];
1649     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1650       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1651       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1652     }
1653     rowlen = bi[i+1] - bi[i];
1654     for (j0=j=0; j<rowlen; j0=j) {
1655       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1656       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1657     }
1658   }
1659   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1660   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1661   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1662   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1663   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1664   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1665   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1666   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1667   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1668   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1669   *B = Aperm;
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1675 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1676 {
1677   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1682   if (ghosts) *ghosts = aij->garray;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1688 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1689 {
1690   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1691   Mat            A    = mat->A,B = mat->B;
1692   PetscErrorCode ierr;
1693   PetscReal      isend[5],irecv[5];
1694 
1695   PetscFunctionBegin;
1696   info->block_size = 1.0;
1697   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1698 
1699   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1700   isend[3] = info->memory;  isend[4] = info->mallocs;
1701 
1702   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1703 
1704   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1705   isend[3] += info->memory;  isend[4] += info->mallocs;
1706   if (flag == MAT_LOCAL) {
1707     info->nz_used      = isend[0];
1708     info->nz_allocated = isend[1];
1709     info->nz_unneeded  = isend[2];
1710     info->memory       = isend[3];
1711     info->mallocs      = isend[4];
1712   } else if (flag == MAT_GLOBAL_MAX) {
1713     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1714 
1715     info->nz_used      = irecv[0];
1716     info->nz_allocated = irecv[1];
1717     info->nz_unneeded  = irecv[2];
1718     info->memory       = irecv[3];
1719     info->mallocs      = irecv[4];
1720   } else if (flag == MAT_GLOBAL_SUM) {
1721     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1722 
1723     info->nz_used      = irecv[0];
1724     info->nz_allocated = irecv[1];
1725     info->nz_unneeded  = irecv[2];
1726     info->memory       = irecv[3];
1727     info->mallocs      = irecv[4];
1728   }
1729   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1730   info->fill_ratio_needed = 0;
1731   info->factor_mallocs    = 0;
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatSetOption_MPIAIJ"
1737 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1738 {
1739   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1740   PetscErrorCode ierr;
1741 
1742   PetscFunctionBegin;
1743   switch (op) {
1744   case MAT_NEW_NONZERO_LOCATIONS:
1745   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1746   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1747   case MAT_KEEP_NONZERO_PATTERN:
1748   case MAT_NEW_NONZERO_LOCATION_ERR:
1749   case MAT_USE_INODES:
1750   case MAT_IGNORE_ZERO_ENTRIES:
1751     MatCheckPreallocated(A,1);
1752     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1753     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1754     break;
1755   case MAT_ROW_ORIENTED:
1756     MatCheckPreallocated(A,1);
1757     a->roworiented = flg;
1758 
1759     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1760     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_NEW_DIAGONALS:
1763     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1764     break;
1765   case MAT_IGNORE_OFF_PROC_ENTRIES:
1766     a->donotstash = flg;
1767     break;
1768   case MAT_SPD:
1769     A->spd_set = PETSC_TRUE;
1770     A->spd     = flg;
1771     if (flg) {
1772       A->symmetric                  = PETSC_TRUE;
1773       A->structurally_symmetric     = PETSC_TRUE;
1774       A->symmetric_set              = PETSC_TRUE;
1775       A->structurally_symmetric_set = PETSC_TRUE;
1776     }
1777     break;
1778   case MAT_SYMMETRIC:
1779     MatCheckPreallocated(A,1);
1780     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1781     break;
1782   case MAT_STRUCTURALLY_SYMMETRIC:
1783     MatCheckPreallocated(A,1);
1784     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1785     break;
1786   case MAT_HERMITIAN:
1787     MatCheckPreallocated(A,1);
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     break;
1790   case MAT_SYMMETRY_ETERNAL:
1791     MatCheckPreallocated(A,1);
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     break;
1794   case MAT_SUBMAT_SINGLEIS:
1795     A->submat_singleis = flg;
1796     break;
1797   default:
1798     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatGetRow_MPIAIJ"
1805 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1806 {
1807   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1808   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1809   PetscErrorCode ierr;
1810   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1811   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1812   PetscInt       *cmap,*idx_p;
1813 
1814   PetscFunctionBegin;
1815   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1816   mat->getrowactive = PETSC_TRUE;
1817 
1818   if (!mat->rowvalues && (idx || v)) {
1819     /*
1820         allocate enough space to hold information from the longest row.
1821     */
1822     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1823     PetscInt   max = 1,tmp;
1824     for (i=0; i<matin->rmap->n; i++) {
1825       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1826       if (max < tmp) max = tmp;
1827     }
1828     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1829   }
1830 
1831   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1832   lrow = row - rstart;
1833 
1834   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1835   if (!v)   {pvA = 0; pvB = 0;}
1836   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1837   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1838   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1839   nztot = nzA + nzB;
1840 
1841   cmap = mat->garray;
1842   if (v  || idx) {
1843     if (nztot) {
1844       /* Sort by increasing column numbers, assuming A and B already sorted */
1845       PetscInt imark = -1;
1846       if (v) {
1847         *v = v_p = mat->rowvalues;
1848         for (i=0; i<nzB; i++) {
1849           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1850           else break;
1851         }
1852         imark = i;
1853         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1854         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1855       }
1856       if (idx) {
1857         *idx = idx_p = mat->rowindices;
1858         if (imark > -1) {
1859           for (i=0; i<imark; i++) {
1860             idx_p[i] = cmap[cworkB[i]];
1861           }
1862         } else {
1863           for (i=0; i<nzB; i++) {
1864             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1865             else break;
1866           }
1867           imark = i;
1868         }
1869         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1870         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1871       }
1872     } else {
1873       if (idx) *idx = 0;
1874       if (v)   *v   = 0;
1875     }
1876   }
1877   *nz  = nztot;
1878   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1879   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1885 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1886 {
1887   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1888 
1889   PetscFunctionBegin;
1890   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1891   aij->getrowactive = PETSC_FALSE;
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "MatNorm_MPIAIJ"
1897 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1898 {
1899   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1900   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1901   PetscErrorCode ierr;
1902   PetscInt       i,j,cstart = mat->cmap->rstart;
1903   PetscReal      sum = 0.0;
1904   MatScalar      *v;
1905 
1906   PetscFunctionBegin;
1907   if (aij->size == 1) {
1908     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1909   } else {
1910     if (type == NORM_FROBENIUS) {
1911       v = amat->a;
1912       for (i=0; i<amat->nz; i++) {
1913         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1914       }
1915       v = bmat->a;
1916       for (i=0; i<bmat->nz; i++) {
1917         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1918       }
1919       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1920       *norm = PetscSqrtReal(*norm);
1921       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1922     } else if (type == NORM_1) { /* max column norm */
1923       PetscReal *tmp,*tmp2;
1924       PetscInt  *jj,*garray = aij->garray;
1925       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1926       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1927       *norm = 0.0;
1928       v     = amat->a; jj = amat->j;
1929       for (j=0; j<amat->nz; j++) {
1930         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1931       }
1932       v = bmat->a; jj = bmat->j;
1933       for (j=0; j<bmat->nz; j++) {
1934         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1935       }
1936       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1937       for (j=0; j<mat->cmap->N; j++) {
1938         if (tmp2[j] > *norm) *norm = tmp2[j];
1939       }
1940       ierr = PetscFree(tmp);CHKERRQ(ierr);
1941       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1942       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1943     } else if (type == NORM_INFINITY) { /* max row norm */
1944       PetscReal ntemp = 0.0;
1945       for (j=0; j<aij->A->rmap->n; j++) {
1946         v   = amat->a + amat->i[j];
1947         sum = 0.0;
1948         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1949           sum += PetscAbsScalar(*v); v++;
1950         }
1951         v = bmat->a + bmat->i[j];
1952         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1953           sum += PetscAbsScalar(*v); v++;
1954         }
1955         if (sum > ntemp) ntemp = sum;
1956       }
1957       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1958       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1959     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatTranspose_MPIAIJ"
1966 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1967 {
1968   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1969   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1970   PetscErrorCode ierr;
1971   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1972   PetscInt       cstart = A->cmap->rstart,ncol;
1973   Mat            B;
1974   MatScalar      *array;
1975 
1976   PetscFunctionBegin;
1977   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1978 
1979   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1980   ai = Aloc->i; aj = Aloc->j;
1981   bi = Bloc->i; bj = Bloc->j;
1982   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1983     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1984     PetscSFNode          *oloc;
1985     PETSC_UNUSED PetscSF sf;
1986 
1987     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1988     /* compute d_nnz for preallocation */
1989     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1990     for (i=0; i<ai[ma]; i++) {
1991       d_nnz[aj[i]]++;
1992       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1993     }
1994     /* compute local off-diagonal contributions */
1995     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1996     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1997     /* map those to global */
1998     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1999     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2000     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2001     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2002     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2003     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2004     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2005 
2006     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2007     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2008     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2009     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2010     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2011     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2012   } else {
2013     B    = *matout;
2014     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2015     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2016   }
2017 
2018   /* copy over the A part */
2019   array = Aloc->a;
2020   row   = A->rmap->rstart;
2021   for (i=0; i<ma; i++) {
2022     ncol = ai[i+1]-ai[i];
2023     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2024     row++;
2025     array += ncol; aj += ncol;
2026   }
2027   aj = Aloc->j;
2028   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2029 
2030   /* copy over the B part */
2031   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2032   array = Bloc->a;
2033   row   = A->rmap->rstart;
2034   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2035   cols_tmp = cols;
2036   for (i=0; i<mb; i++) {
2037     ncol = bi[i+1]-bi[i];
2038     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2039     row++;
2040     array += ncol; cols_tmp += ncol;
2041   }
2042   ierr = PetscFree(cols);CHKERRQ(ierr);
2043 
2044   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2045   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2046   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2047     *matout = B;
2048   } else {
2049     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2050   }
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2056 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2057 {
2058   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2059   Mat            a    = aij->A,b = aij->B;
2060   PetscErrorCode ierr;
2061   PetscInt       s1,s2,s3;
2062 
2063   PetscFunctionBegin;
2064   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2065   if (rr) {
2066     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2067     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2068     /* Overlap communication with computation. */
2069     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2070   }
2071   if (ll) {
2072     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2073     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2074     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2075   }
2076   /* scale  the diagonal block */
2077   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2078 
2079   if (rr) {
2080     /* Do a scatter end and then right scale the off-diagonal block */
2081     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2082     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2089 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2090 {
2091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2092   PetscErrorCode ierr;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatEqual_MPIAIJ"
2101 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2102 {
2103   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2104   Mat            a,b,c,d;
2105   PetscBool      flg;
2106   PetscErrorCode ierr;
2107 
2108   PetscFunctionBegin;
2109   a = matA->A; b = matA->B;
2110   c = matB->A; d = matB->B;
2111 
2112   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2113   if (flg) {
2114     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2115   }
2116   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 #undef __FUNCT__
2121 #define __FUNCT__ "MatCopy_MPIAIJ"
2122 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2123 {
2124   PetscErrorCode ierr;
2125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2126   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2127 
2128   PetscFunctionBegin;
2129   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2130   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2131     /* because of the column compression in the off-processor part of the matrix a->B,
2132        the number of columns in a->B and b->B may be different, hence we cannot call
2133        the MatCopy() directly on the two parts. If need be, we can provide a more
2134        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2135        then copying the submatrices */
2136     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2137   } else {
2138     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2139     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2140   }
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 #undef __FUNCT__
2145 #define __FUNCT__ "MatSetUp_MPIAIJ"
2146 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2147 {
2148   PetscErrorCode ierr;
2149 
2150   PetscFunctionBegin;
2151   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 /*
2156    Computes the number of nonzeros per row needed for preallocation when X and Y
2157    have different nonzero structure.
2158 */
2159 #undef __FUNCT__
2160 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2161 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2162 {
2163   PetscInt       i,j,k,nzx,nzy;
2164 
2165   PetscFunctionBegin;
2166   /* Set the number of nonzeros in the new matrix */
2167   for (i=0; i<m; i++) {
2168     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2169     nzx = xi[i+1] - xi[i];
2170     nzy = yi[i+1] - yi[i];
2171     nnz[i] = 0;
2172     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2173       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2174       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2175       nnz[i]++;
2176     }
2177     for (; k<nzy; k++) nnz[i]++;
2178   }
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2183 #undef __FUNCT__
2184 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2185 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2186 {
2187   PetscErrorCode ierr;
2188   PetscInt       m = Y->rmap->N;
2189   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2190   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2191 
2192   PetscFunctionBegin;
2193   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 #undef __FUNCT__
2198 #define __FUNCT__ "MatAXPY_MPIAIJ"
2199 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2200 {
2201   PetscErrorCode ierr;
2202   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2203   PetscBLASInt   bnz,one=1;
2204   Mat_SeqAIJ     *x,*y;
2205 
2206   PetscFunctionBegin;
2207   if (str == SAME_NONZERO_PATTERN) {
2208     PetscScalar alpha = a;
2209     x    = (Mat_SeqAIJ*)xx->A->data;
2210     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2211     y    = (Mat_SeqAIJ*)yy->A->data;
2212     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2213     x    = (Mat_SeqAIJ*)xx->B->data;
2214     y    = (Mat_SeqAIJ*)yy->B->data;
2215     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2216     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2217     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2218   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2219     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2220   } else {
2221     Mat      B;
2222     PetscInt *nnz_d,*nnz_o;
2223     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2224     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2225     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2226     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2227     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2228     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2229     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2230     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2231     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2232     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2233     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2234     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2235     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2236     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2237   }
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatConjugate_MPIAIJ"
2245 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2246 {
2247 #if defined(PETSC_USE_COMPLEX)
2248   PetscErrorCode ierr;
2249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2253   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2254 #else
2255   PetscFunctionBegin;
2256 #endif
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatRealPart_MPIAIJ"
2262 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2263 {
2264   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2265   PetscErrorCode ierr;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2269   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2275 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2276 {
2277   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2278   PetscErrorCode ierr;
2279 
2280   PetscFunctionBegin;
2281   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2282   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2288 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2289 {
2290   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2291   PetscErrorCode ierr;
2292   PetscInt       i,*idxb = 0;
2293   PetscScalar    *va,*vb;
2294   Vec            vtmp;
2295 
2296   PetscFunctionBegin;
2297   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2298   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2299   if (idx) {
2300     for (i=0; i<A->rmap->n; i++) {
2301       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2302     }
2303   }
2304 
2305   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2306   if (idx) {
2307     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2308   }
2309   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2310   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2311 
2312   for (i=0; i<A->rmap->n; i++) {
2313     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2314       va[i] = vb[i];
2315       if (idx) idx[i] = a->garray[idxb[i]];
2316     }
2317   }
2318 
2319   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2320   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2321   ierr = PetscFree(idxb);CHKERRQ(ierr);
2322   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 #undef __FUNCT__
2327 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2328 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2329 {
2330   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2331   PetscErrorCode ierr;
2332   PetscInt       i,*idxb = 0;
2333   PetscScalar    *va,*vb;
2334   Vec            vtmp;
2335 
2336   PetscFunctionBegin;
2337   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2338   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2339   if (idx) {
2340     for (i=0; i<A->cmap->n; i++) {
2341       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2342     }
2343   }
2344 
2345   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2346   if (idx) {
2347     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2348   }
2349   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2350   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2351 
2352   for (i=0; i<A->rmap->n; i++) {
2353     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2354       va[i] = vb[i];
2355       if (idx) idx[i] = a->garray[idxb[i]];
2356     }
2357   }
2358 
2359   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2360   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2361   ierr = PetscFree(idxb);CHKERRQ(ierr);
2362   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2368 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2369 {
2370   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2371   PetscInt       n      = A->rmap->n;
2372   PetscInt       cstart = A->cmap->rstart;
2373   PetscInt       *cmap  = mat->garray;
2374   PetscInt       *diagIdx, *offdiagIdx;
2375   Vec            diagV, offdiagV;
2376   PetscScalar    *a, *diagA, *offdiagA;
2377   PetscInt       r;
2378   PetscErrorCode ierr;
2379 
2380   PetscFunctionBegin;
2381   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2382   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2383   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2384   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2385   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2386   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2387   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2388   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2389   for (r = 0; r < n; ++r) {
2390     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2391       a[r]   = diagA[r];
2392       idx[r] = cstart + diagIdx[r];
2393     } else {
2394       a[r]   = offdiagA[r];
2395       idx[r] = cmap[offdiagIdx[r]];
2396     }
2397   }
2398   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2399   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2400   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2401   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2402   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2403   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2409 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2410 {
2411   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2412   PetscInt       n      = A->rmap->n;
2413   PetscInt       cstart = A->cmap->rstart;
2414   PetscInt       *cmap  = mat->garray;
2415   PetscInt       *diagIdx, *offdiagIdx;
2416   Vec            diagV, offdiagV;
2417   PetscScalar    *a, *diagA, *offdiagA;
2418   PetscInt       r;
2419   PetscErrorCode ierr;
2420 
2421   PetscFunctionBegin;
2422   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2423   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2424   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2425   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2426   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2427   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2428   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2429   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2430   for (r = 0; r < n; ++r) {
2431     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2432       a[r]   = diagA[r];
2433       idx[r] = cstart + diagIdx[r];
2434     } else {
2435       a[r]   = offdiagA[r];
2436       idx[r] = cmap[offdiagIdx[r]];
2437     }
2438   }
2439   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2440   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2441   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2442   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2443   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2444   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 #undef __FUNCT__
2449 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2450 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2451 {
2452   PetscErrorCode ierr;
2453   Mat            *dummy;
2454 
2455   PetscFunctionBegin;
2456   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2457   *newmat = *dummy;
2458   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2464 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2465 {
2466   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2467   PetscErrorCode ierr;
2468 
2469   PetscFunctionBegin;
2470   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2471   A->factorerrortype = a->A->factorerrortype;
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2477 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2478 {
2479   PetscErrorCode ierr;
2480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2481 
2482   PetscFunctionBegin;
2483   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2484   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2485   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2486   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2492 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2493 {
2494   PetscFunctionBegin;
2495   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2496   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2502 /*@
2503    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2504 
2505    Collective on Mat
2506 
2507    Input Parameters:
2508 +    A - the matrix
2509 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2510 
2511  Level: advanced
2512 
2513 @*/
2514 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2515 {
2516   PetscErrorCode       ierr;
2517 
2518   PetscFunctionBegin;
2519   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 #undef __FUNCT__
2524 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2525 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2526 {
2527   PetscErrorCode       ierr;
2528   PetscBool            sc = PETSC_FALSE,flg;
2529 
2530   PetscFunctionBegin;
2531   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2532   ierr = PetscObjectOptionsBegin((PetscObject)A);
2533     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2534     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2535     if (flg) {
2536       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2537     }
2538   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 #undef __FUNCT__
2543 #define __FUNCT__ "MatShift_MPIAIJ"
2544 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2545 {
2546   PetscErrorCode ierr;
2547   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2548   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2549 
2550   PetscFunctionBegin;
2551   if (!Y->preallocated) {
2552     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2553   } else if (!aij->nz) {
2554     PetscInt nonew = aij->nonew;
2555     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2556     aij->nonew = nonew;
2557   }
2558   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2564 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2565 {
2566   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2567   PetscErrorCode ierr;
2568 
2569   PetscFunctionBegin;
2570   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2571   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2572   if (d) {
2573     PetscInt rstart;
2574     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2575     *d += rstart;
2576 
2577   }
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584                                        MatGetRow_MPIAIJ,
2585                                        MatRestoreRow_MPIAIJ,
2586                                        MatMult_MPIAIJ,
2587                                 /* 4*/ MatMultAdd_MPIAIJ,
2588                                        MatMultTranspose_MPIAIJ,
2589                                        MatMultTransposeAdd_MPIAIJ,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                 /*10*/ 0,
2594                                        0,
2595                                        0,
2596                                        MatSOR_MPIAIJ,
2597                                        MatTranspose_MPIAIJ,
2598                                 /*15*/ MatGetInfo_MPIAIJ,
2599                                        MatEqual_MPIAIJ,
2600                                        MatGetDiagonal_MPIAIJ,
2601                                        MatDiagonalScale_MPIAIJ,
2602                                        MatNorm_MPIAIJ,
2603                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2604                                        MatAssemblyEnd_MPIAIJ,
2605                                        MatSetOption_MPIAIJ,
2606                                        MatZeroEntries_MPIAIJ,
2607                                 /*24*/ MatZeroRows_MPIAIJ,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                 /*29*/ MatSetUp_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        MatGetDiagonalBlock_MPIAIJ,
2616                                        0,
2617                                 /*34*/ MatDuplicate_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                 /*39*/ MatAXPY_MPIAIJ,
2623                                        MatGetSubMatrices_MPIAIJ,
2624                                        MatIncreaseOverlap_MPIAIJ,
2625                                        MatGetValues_MPIAIJ,
2626                                        MatCopy_MPIAIJ,
2627                                 /*44*/ MatGetRowMax_MPIAIJ,
2628                                        MatScale_MPIAIJ,
2629                                        MatShift_MPIAIJ,
2630                                        MatDiagonalSet_MPIAIJ,
2631                                        MatZeroRowsColumns_MPIAIJ,
2632                                 /*49*/ MatSetRandom_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2638                                        0,
2639                                        MatSetUnfactored_MPIAIJ,
2640                                        MatPermute_MPIAIJ,
2641                                        0,
2642                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2643                                        MatDestroy_MPIAIJ,
2644                                        MatView_MPIAIJ,
2645                                        0,
2646                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2647                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2648                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2653                                        MatGetRowMinAbs_MPIAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*75*/ MatFDColoringApply_AIJ,
2659                                        MatSetFromOptions_MPIAIJ,
2660                                        0,
2661                                        0,
2662                                        MatFindZeroDiagonals_MPIAIJ,
2663                                 /*80*/ 0,
2664                                        0,
2665                                        0,
2666                                 /*83*/ MatLoad_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2673                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2674                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2675                                        MatPtAP_MPIAIJ_MPIAIJ,
2676                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2677                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                 /*99*/ 0,
2683                                        0,
2684                                        0,
2685                                        MatConjugate_MPIAIJ,
2686                                        0,
2687                                 /*104*/MatSetValuesRow_MPIAIJ,
2688                                        MatRealPart_MPIAIJ,
2689                                        MatImaginaryPart_MPIAIJ,
2690                                        0,
2691                                        0,
2692                                 /*109*/0,
2693                                        0,
2694                                        MatGetRowMin_MPIAIJ,
2695                                        0,
2696                                        MatMissingDiagonal_MPIAIJ,
2697                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2698                                        0,
2699                                        MatGetGhosts_MPIAIJ,
2700                                        0,
2701                                        0,
2702                                 /*119*/0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        MatGetMultiProcBlock_MPIAIJ,
2707                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2708                                        MatGetColumnNorms_MPIAIJ,
2709                                        MatInvertBlockDiagonal_MPIAIJ,
2710                                        0,
2711                                        MatGetSubMatricesMPI_MPIAIJ,
2712                                 /*129*/0,
2713                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2714                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2715                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2716                                        0,
2717                                 /*134*/0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                 /*139*/MatSetBlockSizes_MPIAIJ,
2723                                        0,
2724                                        0,
2725                                        MatFDColoringSetUp_MPIXAIJ,
2726                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2727                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2728 };
2729 
2730 /* ----------------------------------------------------------------------------------------*/
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2734 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2735 {
2736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2737   PetscErrorCode ierr;
2738 
2739   PetscFunctionBegin;
2740   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2741   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2742   PetscFunctionReturn(0);
2743 }
2744 
2745 #undef __FUNCT__
2746 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2747 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2748 {
2749   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2750   PetscErrorCode ierr;
2751 
2752   PetscFunctionBegin;
2753   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2754   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 #undef __FUNCT__
2759 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2760 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2761 {
2762   Mat_MPIAIJ     *b;
2763   PetscErrorCode ierr;
2764 
2765   PetscFunctionBegin;
2766   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2767   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2768   b = (Mat_MPIAIJ*)B->data;
2769 
2770 #if defined(PETSC_USE_CTABLE)
2771   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2772 #else
2773   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2774 #endif
2775   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2776   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2777   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2778 
2779   /* Because the B will have been resized we simply destroy it and create a new one each time */
2780   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2781   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2782   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2783   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2784   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2785   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2786 
2787   if (!B->preallocated) {
2788     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2789     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2790     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2791     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2793   }
2794 
2795   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2796   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2797   B->preallocated  = PETSC_TRUE;
2798   B->was_assembled = PETSC_FALSE;
2799   B->assembled     = PETSC_FALSE;;
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2805 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2806 {
2807   Mat            mat;
2808   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2809   PetscErrorCode ierr;
2810 
2811   PetscFunctionBegin;
2812   *newmat = 0;
2813   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2814   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2815   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2816   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2817   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2818   a       = (Mat_MPIAIJ*)mat->data;
2819 
2820   mat->factortype   = matin->factortype;
2821   mat->assembled    = PETSC_TRUE;
2822   mat->insertmode   = NOT_SET_VALUES;
2823   mat->preallocated = PETSC_TRUE;
2824 
2825   a->size         = oldmat->size;
2826   a->rank         = oldmat->rank;
2827   a->donotstash   = oldmat->donotstash;
2828   a->roworiented  = oldmat->roworiented;
2829   a->rowindices   = 0;
2830   a->rowvalues    = 0;
2831   a->getrowactive = PETSC_FALSE;
2832 
2833   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2834   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2835 
2836   if (oldmat->colmap) {
2837 #if defined(PETSC_USE_CTABLE)
2838     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2839 #else
2840     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2841     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2842     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2843 #endif
2844   } else a->colmap = 0;
2845   if (oldmat->garray) {
2846     PetscInt len;
2847     len  = oldmat->B->cmap->n;
2848     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2849     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2850     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2851   } else a->garray = 0;
2852 
2853   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2854   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2855   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2856   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2857   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2858   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2859   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2860   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2861   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2862   *newmat = mat;
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatLoad_MPIAIJ"
2870 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2871 {
2872   PetscScalar    *vals,*svals;
2873   MPI_Comm       comm;
2874   PetscErrorCode ierr;
2875   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2876   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2877   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2878   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2879   PetscInt       cend,cstart,n,*rowners;
2880   int            fd;
2881   PetscInt       bs = newMat->rmap->bs;
2882 
2883   PetscFunctionBegin;
2884   /* force binary viewer to load .info file if it has not yet done so */
2885   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2886   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2888   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2889   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2890   if (!rank) {
2891     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2892     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2893     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2894   }
2895 
2896   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2897   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2898   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2899   if (bs < 0) bs = 1;
2900 
2901   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2902   M    = header[1]; N = header[2];
2903 
2904   /* If global sizes are set, check if they are consistent with that given in the file */
2905   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2906   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2907 
2908   /* determine ownership of all (block) rows */
2909   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2910   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2911   else m = newMat->rmap->n; /* Set by user */
2912 
2913   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2914   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2915 
2916   /* First process needs enough room for process with most rows */
2917   if (!rank) {
2918     mmax = rowners[1];
2919     for (i=2; i<=size; i++) {
2920       mmax = PetscMax(mmax, rowners[i]);
2921     }
2922   } else mmax = -1;             /* unused, but compilers complain */
2923 
2924   rowners[0] = 0;
2925   for (i=2; i<=size; i++) {
2926     rowners[i] += rowners[i-1];
2927   }
2928   rstart = rowners[rank];
2929   rend   = rowners[rank+1];
2930 
2931   /* distribute row lengths to all processors */
2932   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2933   if (!rank) {
2934     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2935     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2936     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2937     for (j=0; j<m; j++) {
2938       procsnz[0] += ourlens[j];
2939     }
2940     for (i=1; i<size; i++) {
2941       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2942       /* calculate the number of nonzeros on each processor */
2943       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2944         procsnz[i] += rowlengths[j];
2945       }
2946       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2947     }
2948     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2949   } else {
2950     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2951   }
2952 
2953   if (!rank) {
2954     /* determine max buffer needed and allocate it */
2955     maxnz = 0;
2956     for (i=0; i<size; i++) {
2957       maxnz = PetscMax(maxnz,procsnz[i]);
2958     }
2959     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2960 
2961     /* read in my part of the matrix column indices  */
2962     nz   = procsnz[0];
2963     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2964     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2965 
2966     /* read in every one elses and ship off */
2967     for (i=1; i<size; i++) {
2968       nz   = procsnz[i];
2969       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2970       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2971     }
2972     ierr = PetscFree(cols);CHKERRQ(ierr);
2973   } else {
2974     /* determine buffer space needed for message */
2975     nz = 0;
2976     for (i=0; i<m; i++) {
2977       nz += ourlens[i];
2978     }
2979     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2980 
2981     /* receive message of column indices*/
2982     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2983   }
2984 
2985   /* determine column ownership if matrix is not square */
2986   if (N != M) {
2987     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2988     else n = newMat->cmap->n;
2989     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2990     cstart = cend - n;
2991   } else {
2992     cstart = rstart;
2993     cend   = rend;
2994     n      = cend - cstart;
2995   }
2996 
2997   /* loop over local rows, determining number of off diagonal entries */
2998   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2999   jj   = 0;
3000   for (i=0; i<m; i++) {
3001     for (j=0; j<ourlens[i]; j++) {
3002       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3003       jj++;
3004     }
3005   }
3006 
3007   for (i=0; i<m; i++) {
3008     ourlens[i] -= offlens[i];
3009   }
3010   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3011 
3012   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3013 
3014   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3015 
3016   for (i=0; i<m; i++) {
3017     ourlens[i] += offlens[i];
3018   }
3019 
3020   if (!rank) {
3021     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3022 
3023     /* read in my part of the matrix numerical values  */
3024     nz   = procsnz[0];
3025     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3026 
3027     /* insert into matrix */
3028     jj      = rstart;
3029     smycols = mycols;
3030     svals   = vals;
3031     for (i=0; i<m; i++) {
3032       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3033       smycols += ourlens[i];
3034       svals   += ourlens[i];
3035       jj++;
3036     }
3037 
3038     /* read in other processors and ship out */
3039     for (i=1; i<size; i++) {
3040       nz   = procsnz[i];
3041       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3042       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3043     }
3044     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3045   } else {
3046     /* receive numeric values */
3047     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3048 
3049     /* receive message of values*/
3050     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3051 
3052     /* insert into matrix */
3053     jj      = rstart;
3054     smycols = mycols;
3055     svals   = vals;
3056     for (i=0; i<m; i++) {
3057       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3058       smycols += ourlens[i];
3059       svals   += ourlens[i];
3060       jj++;
3061     }
3062   }
3063   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3064   ierr = PetscFree(vals);CHKERRQ(ierr);
3065   ierr = PetscFree(mycols);CHKERRQ(ierr);
3066   ierr = PetscFree(rowners);CHKERRQ(ierr);
3067   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3068   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3074 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3075 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3076 {
3077   PetscErrorCode ierr;
3078   IS             iscol_local;
3079   PetscInt       csize;
3080 
3081   PetscFunctionBegin;
3082   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3083   if (call == MAT_REUSE_MATRIX) {
3084     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3085     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3086   } else {
3087     /* check if we are grabbing all columns*/
3088     PetscBool    isstride;
3089     PetscMPIInt  lisstride = 0,gisstride;
3090     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3091     if (isstride) {
3092       PetscInt  start,len,mstart,mlen;
3093       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3094       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3095       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3096       if (mstart == start && mlen-mstart == len) lisstride = 1;
3097     }
3098     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3099     if (gisstride) {
3100       PetscInt N;
3101       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3102       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3103       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3104       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3105     } else {
3106       PetscInt cbs;
3107       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3108       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3109       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3110     }
3111   }
3112   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3113   if (call == MAT_INITIAL_MATRIX) {
3114     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3115     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3116   }
3117   PetscFunctionReturn(0);
3118 }
3119 
3120 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3121 #undef __FUNCT__
3122 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3123 /*
3124     Not great since it makes two copies of the submatrix, first an SeqAIJ
3125   in local and then by concatenating the local matrices the end result.
3126   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3127 
3128   Note: This requires a sequential iscol with all indices.
3129 */
3130 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3131 {
3132   PetscErrorCode ierr;
3133   PetscMPIInt    rank,size;
3134   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3135   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3136   PetscBool      allcolumns, colflag;
3137   Mat            M,Mreuse;
3138   MatScalar      *vwork,*aa;
3139   MPI_Comm       comm;
3140   Mat_SeqAIJ     *aij;
3141 
3142   PetscFunctionBegin;
3143   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3144   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3145   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3146 
3147   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3148   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3149   if (colflag && ncol == mat->cmap->N) {
3150     allcolumns = PETSC_TRUE;
3151     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3152   } else {
3153     allcolumns = PETSC_FALSE;
3154   }
3155   if (call ==  MAT_REUSE_MATRIX) {
3156     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3157     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3158     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3159   } else {
3160     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3161   }
3162 
3163   /*
3164       m - number of local rows
3165       n - number of columns (same on all processors)
3166       rstart - first row in new global matrix generated
3167   */
3168   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3169   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3170   if (call == MAT_INITIAL_MATRIX) {
3171     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3172     ii  = aij->i;
3173     jj  = aij->j;
3174 
3175     /*
3176         Determine the number of non-zeros in the diagonal and off-diagonal
3177         portions of the matrix in order to do correct preallocation
3178     */
3179 
3180     /* first get start and end of "diagonal" columns */
3181     if (csize == PETSC_DECIDE) {
3182       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3183       if (mglobal == n) { /* square matrix */
3184         nlocal = m;
3185       } else {
3186         nlocal = n/size + ((n % size) > rank);
3187       }
3188     } else {
3189       nlocal = csize;
3190     }
3191     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3192     rstart = rend - nlocal;
3193     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3194 
3195     /* next, compute all the lengths */
3196     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3197     olens = dlens + m;
3198     for (i=0; i<m; i++) {
3199       jend = ii[i+1] - ii[i];
3200       olen = 0;
3201       dlen = 0;
3202       for (j=0; j<jend; j++) {
3203         if (*jj < rstart || *jj >= rend) olen++;
3204         else dlen++;
3205         jj++;
3206       }
3207       olens[i] = olen;
3208       dlens[i] = dlen;
3209     }
3210     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3211     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3212     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3213     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3214     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3215     ierr = PetscFree(dlens);CHKERRQ(ierr);
3216   } else {
3217     PetscInt ml,nl;
3218 
3219     M    = *newmat;
3220     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3221     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3222     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3223     /*
3224          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3225        rather than the slower MatSetValues().
3226     */
3227     M->was_assembled = PETSC_TRUE;
3228     M->assembled     = PETSC_FALSE;
3229   }
3230   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3231   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3232   ii   = aij->i;
3233   jj   = aij->j;
3234   aa   = aij->a;
3235   for (i=0; i<m; i++) {
3236     row   = rstart + i;
3237     nz    = ii[i+1] - ii[i];
3238     cwork = jj;     jj += nz;
3239     vwork = aa;     aa += nz;
3240     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3241   }
3242 
3243   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3244   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3245   *newmat = M;
3246 
3247   /* save submatrix used in processor for next request */
3248   if (call ==  MAT_INITIAL_MATRIX) {
3249     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3250     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3251   }
3252   PetscFunctionReturn(0);
3253 }
3254 
3255 #undef __FUNCT__
3256 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3257 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3258 {
3259   PetscInt       m,cstart, cend,j,nnz,i,d;
3260   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3261   const PetscInt *JJ;
3262   PetscScalar    *values;
3263   PetscErrorCode ierr;
3264 
3265   PetscFunctionBegin;
3266   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3267 
3268   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3269   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3270   m      = B->rmap->n;
3271   cstart = B->cmap->rstart;
3272   cend   = B->cmap->rend;
3273   rstart = B->rmap->rstart;
3274 
3275   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3276 
3277 #if defined(PETSC_USE_DEBUGGING)
3278   for (i=0; i<m; i++) {
3279     nnz = Ii[i+1]- Ii[i];
3280     JJ  = J + Ii[i];
3281     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3282     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3283     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3284   }
3285 #endif
3286 
3287   for (i=0; i<m; i++) {
3288     nnz     = Ii[i+1]- Ii[i];
3289     JJ      = J + Ii[i];
3290     nnz_max = PetscMax(nnz_max,nnz);
3291     d       = 0;
3292     for (j=0; j<nnz; j++) {
3293       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3294     }
3295     d_nnz[i] = d;
3296     o_nnz[i] = nnz - d;
3297   }
3298   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3299   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3300 
3301   if (v) values = (PetscScalar*)v;
3302   else {
3303     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3304   }
3305 
3306   for (i=0; i<m; i++) {
3307     ii   = i + rstart;
3308     nnz  = Ii[i+1]- Ii[i];
3309     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3310   }
3311   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3312   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3313 
3314   if (!v) {
3315     ierr = PetscFree(values);CHKERRQ(ierr);
3316   }
3317   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3318   PetscFunctionReturn(0);
3319 }
3320 
3321 #undef __FUNCT__
3322 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3323 /*@
3324    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3325    (the default parallel PETSc format).
3326 
3327    Collective on MPI_Comm
3328 
3329    Input Parameters:
3330 +  B - the matrix
3331 .  i - the indices into j for the start of each local row (starts with zero)
3332 .  j - the column indices for each local row (starts with zero)
3333 -  v - optional values in the matrix
3334 
3335    Level: developer
3336 
3337    Notes:
3338        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3339      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3340      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3341 
3342        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3343 
3344        The format which is used for the sparse matrix input, is equivalent to a
3345     row-major ordering.. i.e for the following matrix, the input data expected is
3346     as shown
3347 
3348 $        1 0 0
3349 $        2 0 3     P0
3350 $       -------
3351 $        4 5 6     P1
3352 $
3353 $     Process0 [P0]: rows_owned=[0,1]
3354 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3355 $        j =  {0,0,2}  [size = 3]
3356 $        v =  {1,2,3}  [size = 3]
3357 $
3358 $     Process1 [P1]: rows_owned=[2]
3359 $        i =  {0,3}    [size = nrow+1  = 1+1]
3360 $        j =  {0,1,2}  [size = 3]
3361 $        v =  {4,5,6}  [size = 3]
3362 
3363 .keywords: matrix, aij, compressed row, sparse, parallel
3364 
3365 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3366           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3367 @*/
3368 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3369 {
3370   PetscErrorCode ierr;
3371 
3372   PetscFunctionBegin;
3373   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3374   PetscFunctionReturn(0);
3375 }
3376 
3377 #undef __FUNCT__
3378 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3379 /*@C
3380    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3381    (the default parallel PETSc format).  For good matrix assembly performance
3382    the user should preallocate the matrix storage by setting the parameters
3383    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3384    performance can be increased by more than a factor of 50.
3385 
3386    Collective on MPI_Comm
3387 
3388    Input Parameters:
3389 +  B - the matrix
3390 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3391            (same value is used for all local rows)
3392 .  d_nnz - array containing the number of nonzeros in the various rows of the
3393            DIAGONAL portion of the local submatrix (possibly different for each row)
3394            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3395            The size of this array is equal to the number of local rows, i.e 'm'.
3396            For matrices that will be factored, you must leave room for (and set)
3397            the diagonal entry even if it is zero.
3398 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3399            submatrix (same value is used for all local rows).
3400 -  o_nnz - array containing the number of nonzeros in the various rows of the
3401            OFF-DIAGONAL portion of the local submatrix (possibly different for
3402            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3403            structure. The size of this array is equal to the number
3404            of local rows, i.e 'm'.
3405 
3406    If the *_nnz parameter is given then the *_nz parameter is ignored
3407 
3408    The AIJ format (also called the Yale sparse matrix format or
3409    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3410    storage.  The stored row and column indices begin with zero.
3411    See Users-Manual: ch_mat for details.
3412 
3413    The parallel matrix is partitioned such that the first m0 rows belong to
3414    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3415    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3416 
3417    The DIAGONAL portion of the local submatrix of a processor can be defined
3418    as the submatrix which is obtained by extraction the part corresponding to
3419    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3420    first row that belongs to the processor, r2 is the last row belonging to
3421    the this processor, and c1-c2 is range of indices of the local part of a
3422    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3423    common case of a square matrix, the row and column ranges are the same and
3424    the DIAGONAL part is also square. The remaining portion of the local
3425    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3426 
3427    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3428 
3429    You can call MatGetInfo() to get information on how effective the preallocation was;
3430    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3431    You can also run with the option -info and look for messages with the string
3432    malloc in them to see if additional memory allocation was needed.
3433 
3434    Example usage:
3435 
3436    Consider the following 8x8 matrix with 34 non-zero values, that is
3437    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3438    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3439    as follows:
3440 
3441 .vb
3442             1  2  0  |  0  3  0  |  0  4
3443     Proc0   0  5  6  |  7  0  0  |  8  0
3444             9  0 10  | 11  0  0  | 12  0
3445     -------------------------------------
3446            13  0 14  | 15 16 17  |  0  0
3447     Proc1   0 18  0  | 19 20 21  |  0  0
3448             0  0  0  | 22 23  0  | 24  0
3449     -------------------------------------
3450     Proc2  25 26 27  |  0  0 28  | 29  0
3451            30  0  0  | 31 32 33  |  0 34
3452 .ve
3453 
3454    This can be represented as a collection of submatrices as:
3455 
3456 .vb
3457       A B C
3458       D E F
3459       G H I
3460 .ve
3461 
3462    Where the submatrices A,B,C are owned by proc0, D,E,F are
3463    owned by proc1, G,H,I are owned by proc2.
3464 
3465    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3466    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3467    The 'M','N' parameters are 8,8, and have the same values on all procs.
3468 
3469    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3470    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3471    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3472    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3473    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3474    matrix, ans [DF] as another SeqAIJ matrix.
3475 
3476    When d_nz, o_nz parameters are specified, d_nz storage elements are
3477    allocated for every row of the local diagonal submatrix, and o_nz
3478    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3479    One way to choose d_nz and o_nz is to use the max nonzerors per local
3480    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3481    In this case, the values of d_nz,o_nz are:
3482 .vb
3483      proc0 : dnz = 2, o_nz = 2
3484      proc1 : dnz = 3, o_nz = 2
3485      proc2 : dnz = 1, o_nz = 4
3486 .ve
3487    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3488    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3489    for proc3. i.e we are using 12+15+10=37 storage locations to store
3490    34 values.
3491 
3492    When d_nnz, o_nnz parameters are specified, the storage is specified
3493    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3494    In the above case the values for d_nnz,o_nnz are:
3495 .vb
3496      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3497      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3498      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3499 .ve
3500    Here the space allocated is sum of all the above values i.e 34, and
3501    hence pre-allocation is perfect.
3502 
3503    Level: intermediate
3504 
3505 .keywords: matrix, aij, compressed row, sparse, parallel
3506 
3507 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3508           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3509 @*/
3510 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3511 {
3512   PetscErrorCode ierr;
3513 
3514   PetscFunctionBegin;
3515   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3516   PetscValidType(B,1);
3517   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3518   PetscFunctionReturn(0);
3519 }
3520 
3521 #undef __FUNCT__
3522 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3523 /*@
3524      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3525          CSR format the local rows.
3526 
3527    Collective on MPI_Comm
3528 
3529    Input Parameters:
3530 +  comm - MPI communicator
3531 .  m - number of local rows (Cannot be PETSC_DECIDE)
3532 .  n - This value should be the same as the local size used in creating the
3533        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3534        calculated if N is given) For square matrices n is almost always m.
3535 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3536 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3537 .   i - row indices
3538 .   j - column indices
3539 -   a - matrix values
3540 
3541    Output Parameter:
3542 .   mat - the matrix
3543 
3544    Level: intermediate
3545 
3546    Notes:
3547        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3548      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3549      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3550 
3551        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3552 
3553        The format which is used for the sparse matrix input, is equivalent to a
3554     row-major ordering.. i.e for the following matrix, the input data expected is
3555     as shown
3556 
3557 $        1 0 0
3558 $        2 0 3     P0
3559 $       -------
3560 $        4 5 6     P1
3561 $
3562 $     Process0 [P0]: rows_owned=[0,1]
3563 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3564 $        j =  {0,0,2}  [size = 3]
3565 $        v =  {1,2,3}  [size = 3]
3566 $
3567 $     Process1 [P1]: rows_owned=[2]
3568 $        i =  {0,3}    [size = nrow+1  = 1+1]
3569 $        j =  {0,1,2}  [size = 3]
3570 $        v =  {4,5,6}  [size = 3]
3571 
3572 .keywords: matrix, aij, compressed row, sparse, parallel
3573 
3574 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3575           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3576 @*/
3577 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3578 {
3579   PetscErrorCode ierr;
3580 
3581   PetscFunctionBegin;
3582   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3583   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3584   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3585   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3586   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3587   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3588   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3589   PetscFunctionReturn(0);
3590 }
3591 
3592 #undef __FUNCT__
3593 #define __FUNCT__ "MatCreateAIJ"
3594 /*@C
3595    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3596    (the default parallel PETSc format).  For good matrix assembly performance
3597    the user should preallocate the matrix storage by setting the parameters
3598    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3599    performance can be increased by more than a factor of 50.
3600 
3601    Collective on MPI_Comm
3602 
3603    Input Parameters:
3604 +  comm - MPI communicator
3605 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3606            This value should be the same as the local size used in creating the
3607            y vector for the matrix-vector product y = Ax.
3608 .  n - This value should be the same as the local size used in creating the
3609        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3610        calculated if N is given) For square matrices n is almost always m.
3611 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3612 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3613 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3614            (same value is used for all local rows)
3615 .  d_nnz - array containing the number of nonzeros in the various rows of the
3616            DIAGONAL portion of the local submatrix (possibly different for each row)
3617            or NULL, if d_nz is used to specify the nonzero structure.
3618            The size of this array is equal to the number of local rows, i.e 'm'.
3619 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3620            submatrix (same value is used for all local rows).
3621 -  o_nnz - array containing the number of nonzeros in the various rows of the
3622            OFF-DIAGONAL portion of the local submatrix (possibly different for
3623            each row) or NULL, if o_nz is used to specify the nonzero
3624            structure. The size of this array is equal to the number
3625            of local rows, i.e 'm'.
3626 
3627    Output Parameter:
3628 .  A - the matrix
3629 
3630    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3631    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3632    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3633 
3634    Notes:
3635    If the *_nnz parameter is given then the *_nz parameter is ignored
3636 
3637    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3638    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3639    storage requirements for this matrix.
3640 
3641    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3642    processor than it must be used on all processors that share the object for
3643    that argument.
3644 
3645    The user MUST specify either the local or global matrix dimensions
3646    (possibly both).
3647 
3648    The parallel matrix is partitioned across processors such that the
3649    first m0 rows belong to process 0, the next m1 rows belong to
3650    process 1, the next m2 rows belong to process 2 etc.. where
3651    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3652    values corresponding to [m x N] submatrix.
3653 
3654    The columns are logically partitioned with the n0 columns belonging
3655    to 0th partition, the next n1 columns belonging to the next
3656    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3657 
3658    The DIAGONAL portion of the local submatrix on any given processor
3659    is the submatrix corresponding to the rows and columns m,n
3660    corresponding to the given processor. i.e diagonal matrix on
3661    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3662    etc. The remaining portion of the local submatrix [m x (N-n)]
3663    constitute the OFF-DIAGONAL portion. The example below better
3664    illustrates this concept.
3665 
3666    For a square global matrix we define each processor's diagonal portion
3667    to be its local rows and the corresponding columns (a square submatrix);
3668    each processor's off-diagonal portion encompasses the remainder of the
3669    local matrix (a rectangular submatrix).
3670 
3671    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3672 
3673    When calling this routine with a single process communicator, a matrix of
3674    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3675    type of communicator, use the construction mechanism:
3676      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3677 
3678    By default, this format uses inodes (identical nodes) when possible.
3679    We search for consecutive rows with the same nonzero structure, thereby
3680    reusing matrix information to achieve increased efficiency.
3681 
3682    Options Database Keys:
3683 +  -mat_no_inode  - Do not use inodes
3684 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3685 -  -mat_aij_oneindex - Internally use indexing starting at 1
3686         rather than 0.  Note that when calling MatSetValues(),
3687         the user still MUST index entries starting at 0!
3688 
3689 
3690    Example usage:
3691 
3692    Consider the following 8x8 matrix with 34 non-zero values, that is
3693    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3694    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3695    as follows:
3696 
3697 .vb
3698             1  2  0  |  0  3  0  |  0  4
3699     Proc0   0  5  6  |  7  0  0  |  8  0
3700             9  0 10  | 11  0  0  | 12  0
3701     -------------------------------------
3702            13  0 14  | 15 16 17  |  0  0
3703     Proc1   0 18  0  | 19 20 21  |  0  0
3704             0  0  0  | 22 23  0  | 24  0
3705     -------------------------------------
3706     Proc2  25 26 27  |  0  0 28  | 29  0
3707            30  0  0  | 31 32 33  |  0 34
3708 .ve
3709 
3710    This can be represented as a collection of submatrices as:
3711 
3712 .vb
3713       A B C
3714       D E F
3715       G H I
3716 .ve
3717 
3718    Where the submatrices A,B,C are owned by proc0, D,E,F are
3719    owned by proc1, G,H,I are owned by proc2.
3720 
3721    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3722    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3723    The 'M','N' parameters are 8,8, and have the same values on all procs.
3724 
3725    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3726    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3727    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3728    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3729    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3730    matrix, ans [DF] as another SeqAIJ matrix.
3731 
3732    When d_nz, o_nz parameters are specified, d_nz storage elements are
3733    allocated for every row of the local diagonal submatrix, and o_nz
3734    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3735    One way to choose d_nz and o_nz is to use the max nonzerors per local
3736    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3737    In this case, the values of d_nz,o_nz are:
3738 .vb
3739      proc0 : dnz = 2, o_nz = 2
3740      proc1 : dnz = 3, o_nz = 2
3741      proc2 : dnz = 1, o_nz = 4
3742 .ve
3743    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3744    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3745    for proc3. i.e we are using 12+15+10=37 storage locations to store
3746    34 values.
3747 
3748    When d_nnz, o_nnz parameters are specified, the storage is specified
3749    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3750    In the above case the values for d_nnz,o_nnz are:
3751 .vb
3752      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3753      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3754      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3755 .ve
3756    Here the space allocated is sum of all the above values i.e 34, and
3757    hence pre-allocation is perfect.
3758 
3759    Level: intermediate
3760 
3761 .keywords: matrix, aij, compressed row, sparse, parallel
3762 
3763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3764           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3765 @*/
3766 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3767 {
3768   PetscErrorCode ierr;
3769   PetscMPIInt    size;
3770 
3771   PetscFunctionBegin;
3772   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3773   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3774   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3775   if (size > 1) {
3776     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3777     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3778   } else {
3779     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3780     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3781   }
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 #undef __FUNCT__
3786 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3787 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3788 {
3789   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3790   PetscBool      flg;
3791   PetscErrorCode ierr;
3792 
3793   PetscFunctionBegin;
3794   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3795   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3796   if (Ad)     *Ad     = a->A;
3797   if (Ao)     *Ao     = a->B;
3798   if (colmap) *colmap = a->garray;
3799   PetscFunctionReturn(0);
3800 }
3801 
3802 #undef __FUNCT__
3803 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3804 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3805 {
3806   PetscErrorCode ierr;
3807   PetscInt       m,N,i,rstart,nnz,Ii;
3808   PetscInt       *indx;
3809   PetscScalar    *values;
3810 
3811   PetscFunctionBegin;
3812   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3813   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3814     PetscInt       *dnz,*onz,sum,bs,cbs;
3815 
3816     if (n == PETSC_DECIDE) {
3817       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3818     }
3819     /* Check sum(n) = N */
3820     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3821     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3822 
3823     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3824     rstart -= m;
3825 
3826     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3827     for (i=0; i<m; i++) {
3828       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3829       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3830       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3831     }
3832 
3833     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3834     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3835     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3836     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3837     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3838     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3839     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3840   }
3841 
3842   /* numeric phase */
3843   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3844   for (i=0; i<m; i++) {
3845     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3846     Ii   = i + rstart;
3847     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3848     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3849   }
3850   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3851   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3852   PetscFunctionReturn(0);
3853 }
3854 
3855 #undef __FUNCT__
3856 #define __FUNCT__ "MatFileSplit"
3857 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3858 {
3859   PetscErrorCode    ierr;
3860   PetscMPIInt       rank;
3861   PetscInt          m,N,i,rstart,nnz;
3862   size_t            len;
3863   const PetscInt    *indx;
3864   PetscViewer       out;
3865   char              *name;
3866   Mat               B;
3867   const PetscScalar *values;
3868 
3869   PetscFunctionBegin;
3870   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3871   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3872   /* Should this be the type of the diagonal block of A? */
3873   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3874   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3875   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3876   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3877   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3878   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3879   for (i=0; i<m; i++) {
3880     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3881     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3882     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3883   }
3884   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3885   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3886 
3887   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3888   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3889   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3890   sprintf(name,"%s.%d",outfile,rank);
3891   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3892   ierr = PetscFree(name);CHKERRQ(ierr);
3893   ierr = MatView(B,out);CHKERRQ(ierr);
3894   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3895   ierr = MatDestroy(&B);CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3900 #undef __FUNCT__
3901 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3902 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3903 {
3904   PetscErrorCode      ierr;
3905   Mat_Merge_SeqsToMPI *merge;
3906   PetscContainer      container;
3907 
3908   PetscFunctionBegin;
3909   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3910   if (container) {
3911     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3912     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3913     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3914     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3915     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3916     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3917     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3918     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3919     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3920     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3921     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3922     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3923     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3924     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3925     ierr = PetscFree(merge);CHKERRQ(ierr);
3926     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3927   }
3928   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3929   PetscFunctionReturn(0);
3930 }
3931 
3932 #include <../src/mat/utils/freespace.h>
3933 #include <petscbt.h>
3934 
3935 #undef __FUNCT__
3936 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3937 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3938 {
3939   PetscErrorCode      ierr;
3940   MPI_Comm            comm;
3941   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3942   PetscMPIInt         size,rank,taga,*len_s;
3943   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3944   PetscInt            proc,m;
3945   PetscInt            **buf_ri,**buf_rj;
3946   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3947   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3948   MPI_Request         *s_waits,*r_waits;
3949   MPI_Status          *status;
3950   MatScalar           *aa=a->a;
3951   MatScalar           **abuf_r,*ba_i;
3952   Mat_Merge_SeqsToMPI *merge;
3953   PetscContainer      container;
3954 
3955   PetscFunctionBegin;
3956   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3957   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3958 
3959   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3960   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3961 
3962   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3963   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3964 
3965   bi     = merge->bi;
3966   bj     = merge->bj;
3967   buf_ri = merge->buf_ri;
3968   buf_rj = merge->buf_rj;
3969 
3970   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3971   owners = merge->rowmap->range;
3972   len_s  = merge->len_s;
3973 
3974   /* send and recv matrix values */
3975   /*-----------------------------*/
3976   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3977   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3978 
3979   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3980   for (proc=0,k=0; proc<size; proc++) {
3981     if (!len_s[proc]) continue;
3982     i    = owners[proc];
3983     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3984     k++;
3985   }
3986 
3987   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3988   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3989   ierr = PetscFree(status);CHKERRQ(ierr);
3990 
3991   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3992   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3993 
3994   /* insert mat values of mpimat */
3995   /*----------------------------*/
3996   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3997   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3998 
3999   for (k=0; k<merge->nrecv; k++) {
4000     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4001     nrows       = *(buf_ri_k[k]);
4002     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4003     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4004   }
4005 
4006   /* set values of ba */
4007   m = merge->rowmap->n;
4008   for (i=0; i<m; i++) {
4009     arow = owners[rank] + i;
4010     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4011     bnzi = bi[i+1] - bi[i];
4012     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4013 
4014     /* add local non-zero vals of this proc's seqmat into ba */
4015     anzi   = ai[arow+1] - ai[arow];
4016     aj     = a->j + ai[arow];
4017     aa     = a->a + ai[arow];
4018     nextaj = 0;
4019     for (j=0; nextaj<anzi; j++) {
4020       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4021         ba_i[j] += aa[nextaj++];
4022       }
4023     }
4024 
4025     /* add received vals into ba */
4026     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4027       /* i-th row */
4028       if (i == *nextrow[k]) {
4029         anzi   = *(nextai[k]+1) - *nextai[k];
4030         aj     = buf_rj[k] + *(nextai[k]);
4031         aa     = abuf_r[k] + *(nextai[k]);
4032         nextaj = 0;
4033         for (j=0; nextaj<anzi; j++) {
4034           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4035             ba_i[j] += aa[nextaj++];
4036           }
4037         }
4038         nextrow[k]++; nextai[k]++;
4039       }
4040     }
4041     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4042   }
4043   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4044   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4045 
4046   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4047   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4048   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4049   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4050   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4051   PetscFunctionReturn(0);
4052 }
4053 
4054 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4055 
4056 #undef __FUNCT__
4057 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4058 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4059 {
4060   PetscErrorCode      ierr;
4061   Mat                 B_mpi;
4062   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4063   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4064   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4065   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4066   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4067   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4068   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4069   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4070   MPI_Status          *status;
4071   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4072   PetscBT             lnkbt;
4073   Mat_Merge_SeqsToMPI *merge;
4074   PetscContainer      container;
4075 
4076   PetscFunctionBegin;
4077   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4078 
4079   /* make sure it is a PETSc comm */
4080   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4081   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4082   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4083 
4084   ierr = PetscNew(&merge);CHKERRQ(ierr);
4085   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4086 
4087   /* determine row ownership */
4088   /*---------------------------------------------------------*/
4089   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4090   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4091   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4092   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4093   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4094   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4095   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4096 
4097   m      = merge->rowmap->n;
4098   owners = merge->rowmap->range;
4099 
4100   /* determine the number of messages to send, their lengths */
4101   /*---------------------------------------------------------*/
4102   len_s = merge->len_s;
4103 
4104   len          = 0; /* length of buf_si[] */
4105   merge->nsend = 0;
4106   for (proc=0; proc<size; proc++) {
4107     len_si[proc] = 0;
4108     if (proc == rank) {
4109       len_s[proc] = 0;
4110     } else {
4111       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4112       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4113     }
4114     if (len_s[proc]) {
4115       merge->nsend++;
4116       nrows = 0;
4117       for (i=owners[proc]; i<owners[proc+1]; i++) {
4118         if (ai[i+1] > ai[i]) nrows++;
4119       }
4120       len_si[proc] = 2*(nrows+1);
4121       len         += len_si[proc];
4122     }
4123   }
4124 
4125   /* determine the number and length of messages to receive for ij-structure */
4126   /*-------------------------------------------------------------------------*/
4127   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4128   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4129 
4130   /* post the Irecv of j-structure */
4131   /*-------------------------------*/
4132   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4133   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4134 
4135   /* post the Isend of j-structure */
4136   /*--------------------------------*/
4137   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4138 
4139   for (proc=0, k=0; proc<size; proc++) {
4140     if (!len_s[proc]) continue;
4141     i    = owners[proc];
4142     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4143     k++;
4144   }
4145 
4146   /* receives and sends of j-structure are complete */
4147   /*------------------------------------------------*/
4148   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4149   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4150 
4151   /* send and recv i-structure */
4152   /*---------------------------*/
4153   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4154   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4155 
4156   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4157   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4158   for (proc=0,k=0; proc<size; proc++) {
4159     if (!len_s[proc]) continue;
4160     /* form outgoing message for i-structure:
4161          buf_si[0]:                 nrows to be sent
4162                [1:nrows]:           row index (global)
4163                [nrows+1:2*nrows+1]: i-structure index
4164     */
4165     /*-------------------------------------------*/
4166     nrows       = len_si[proc]/2 - 1;
4167     buf_si_i    = buf_si + nrows+1;
4168     buf_si[0]   = nrows;
4169     buf_si_i[0] = 0;
4170     nrows       = 0;
4171     for (i=owners[proc]; i<owners[proc+1]; i++) {
4172       anzi = ai[i+1] - ai[i];
4173       if (anzi) {
4174         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4175         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4176         nrows++;
4177       }
4178     }
4179     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4180     k++;
4181     buf_si += len_si[proc];
4182   }
4183 
4184   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4185   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4186 
4187   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4188   for (i=0; i<merge->nrecv; i++) {
4189     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4190   }
4191 
4192   ierr = PetscFree(len_si);CHKERRQ(ierr);
4193   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4194   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4195   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4196   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4197   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4198   ierr = PetscFree(status);CHKERRQ(ierr);
4199 
4200   /* compute a local seq matrix in each processor */
4201   /*----------------------------------------------*/
4202   /* allocate bi array and free space for accumulating nonzero column info */
4203   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4204   bi[0] = 0;
4205 
4206   /* create and initialize a linked list */
4207   nlnk = N+1;
4208   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4209 
4210   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4211   len  = ai[owners[rank+1]] - ai[owners[rank]];
4212   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4213 
4214   current_space = free_space;
4215 
4216   /* determine symbolic info for each local row */
4217   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4218 
4219   for (k=0; k<merge->nrecv; k++) {
4220     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4221     nrows       = *buf_ri_k[k];
4222     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4223     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4224   }
4225 
4226   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4227   len  = 0;
4228   for (i=0; i<m; i++) {
4229     bnzi = 0;
4230     /* add local non-zero cols of this proc's seqmat into lnk */
4231     arow  = owners[rank] + i;
4232     anzi  = ai[arow+1] - ai[arow];
4233     aj    = a->j + ai[arow];
4234     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4235     bnzi += nlnk;
4236     /* add received col data into lnk */
4237     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4238       if (i == *nextrow[k]) { /* i-th row */
4239         anzi  = *(nextai[k]+1) - *nextai[k];
4240         aj    = buf_rj[k] + *nextai[k];
4241         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4242         bnzi += nlnk;
4243         nextrow[k]++; nextai[k]++;
4244       }
4245     }
4246     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4247 
4248     /* if free space is not available, make more free space */
4249     if (current_space->local_remaining<bnzi) {
4250       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4251       nspacedouble++;
4252     }
4253     /* copy data into free space, then initialize lnk */
4254     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4255     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4256 
4257     current_space->array           += bnzi;
4258     current_space->local_used      += bnzi;
4259     current_space->local_remaining -= bnzi;
4260 
4261     bi[i+1] = bi[i] + bnzi;
4262   }
4263 
4264   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4265 
4266   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4267   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4268   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4269 
4270   /* create symbolic parallel matrix B_mpi */
4271   /*---------------------------------------*/
4272   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4273   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4274   if (n==PETSC_DECIDE) {
4275     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4276   } else {
4277     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4278   }
4279   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4280   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4281   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4282   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4283   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4284 
4285   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4286   B_mpi->assembled    = PETSC_FALSE;
4287   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4288   merge->bi           = bi;
4289   merge->bj           = bj;
4290   merge->buf_ri       = buf_ri;
4291   merge->buf_rj       = buf_rj;
4292   merge->coi          = NULL;
4293   merge->coj          = NULL;
4294   merge->owners_co    = NULL;
4295 
4296   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4297 
4298   /* attach the supporting struct to B_mpi for reuse */
4299   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4300   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4301   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4302   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4303   *mpimat = B_mpi;
4304 
4305   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4306   PetscFunctionReturn(0);
4307 }
4308 
4309 #undef __FUNCT__
4310 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4311 /*@C
4312       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4313                  matrices from each processor
4314 
4315     Collective on MPI_Comm
4316 
4317    Input Parameters:
4318 +    comm - the communicators the parallel matrix will live on
4319 .    seqmat - the input sequential matrices
4320 .    m - number of local rows (or PETSC_DECIDE)
4321 .    n - number of local columns (or PETSC_DECIDE)
4322 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4323 
4324    Output Parameter:
4325 .    mpimat - the parallel matrix generated
4326 
4327     Level: advanced
4328 
4329    Notes:
4330      The dimensions of the sequential matrix in each processor MUST be the same.
4331      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4332      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4333 @*/
4334 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4335 {
4336   PetscErrorCode ierr;
4337   PetscMPIInt    size;
4338 
4339   PetscFunctionBegin;
4340   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4341   if (size == 1) {
4342     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4343     if (scall == MAT_INITIAL_MATRIX) {
4344       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4345     } else {
4346       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4347     }
4348     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4349     PetscFunctionReturn(0);
4350   }
4351   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4352   if (scall == MAT_INITIAL_MATRIX) {
4353     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4354   }
4355   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4356   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4357   PetscFunctionReturn(0);
4358 }
4359 
4360 #undef __FUNCT__
4361 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4362 /*@
4363      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4364           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4365           with MatGetSize()
4366 
4367     Not Collective
4368 
4369    Input Parameters:
4370 +    A - the matrix
4371 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4372 
4373    Output Parameter:
4374 .    A_loc - the local sequential matrix generated
4375 
4376     Level: developer
4377 
4378 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4379 
4380 @*/
4381 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4382 {
4383   PetscErrorCode ierr;
4384   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4385   Mat_SeqAIJ     *mat,*a,*b;
4386   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4387   MatScalar      *aa,*ba,*cam;
4388   PetscScalar    *ca;
4389   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4390   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4391   PetscBool      match;
4392   MPI_Comm       comm;
4393   PetscMPIInt    size;
4394 
4395   PetscFunctionBegin;
4396   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4397   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4398   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4399   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4400   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4401 
4402   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4403   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4404   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4405   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4406   aa = a->a; ba = b->a;
4407   if (scall == MAT_INITIAL_MATRIX) {
4408     if (size == 1) {
4409       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4410       PetscFunctionReturn(0);
4411     }
4412 
4413     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4414     ci[0] = 0;
4415     for (i=0; i<am; i++) {
4416       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4417     }
4418     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4419     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4420     k    = 0;
4421     for (i=0; i<am; i++) {
4422       ncols_o = bi[i+1] - bi[i];
4423       ncols_d = ai[i+1] - ai[i];
4424       /* off-diagonal portion of A */
4425       for (jo=0; jo<ncols_o; jo++) {
4426         col = cmap[*bj];
4427         if (col >= cstart) break;
4428         cj[k]   = col; bj++;
4429         ca[k++] = *ba++;
4430       }
4431       /* diagonal portion of A */
4432       for (j=0; j<ncols_d; j++) {
4433         cj[k]   = cstart + *aj++;
4434         ca[k++] = *aa++;
4435       }
4436       /* off-diagonal portion of A */
4437       for (j=jo; j<ncols_o; j++) {
4438         cj[k]   = cmap[*bj++];
4439         ca[k++] = *ba++;
4440       }
4441     }
4442     /* put together the new matrix */
4443     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4444     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4445     /* Since these are PETSc arrays, change flags to free them as necessary. */
4446     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4447     mat->free_a  = PETSC_TRUE;
4448     mat->free_ij = PETSC_TRUE;
4449     mat->nonew   = 0;
4450   } else if (scall == MAT_REUSE_MATRIX) {
4451     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4452     ci = mat->i; cj = mat->j; cam = mat->a;
4453     for (i=0; i<am; i++) {
4454       /* off-diagonal portion of A */
4455       ncols_o = bi[i+1] - bi[i];
4456       for (jo=0; jo<ncols_o; jo++) {
4457         col = cmap[*bj];
4458         if (col >= cstart) break;
4459         *cam++ = *ba++; bj++;
4460       }
4461       /* diagonal portion of A */
4462       ncols_d = ai[i+1] - ai[i];
4463       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4464       /* off-diagonal portion of A */
4465       for (j=jo; j<ncols_o; j++) {
4466         *cam++ = *ba++; bj++;
4467       }
4468     }
4469   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4470   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4471   PetscFunctionReturn(0);
4472 }
4473 
4474 #undef __FUNCT__
4475 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4476 /*@C
4477      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4478 
4479     Not Collective
4480 
4481    Input Parameters:
4482 +    A - the matrix
4483 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4484 -    row, col - index sets of rows and columns to extract (or NULL)
4485 
4486    Output Parameter:
4487 .    A_loc - the local sequential matrix generated
4488 
4489     Level: developer
4490 
4491 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4492 
4493 @*/
4494 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4495 {
4496   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4497   PetscErrorCode ierr;
4498   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4499   IS             isrowa,iscola;
4500   Mat            *aloc;
4501   PetscBool      match;
4502 
4503   PetscFunctionBegin;
4504   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4505   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4506   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4507   if (!row) {
4508     start = A->rmap->rstart; end = A->rmap->rend;
4509     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4510   } else {
4511     isrowa = *row;
4512   }
4513   if (!col) {
4514     start = A->cmap->rstart;
4515     cmap  = a->garray;
4516     nzA   = a->A->cmap->n;
4517     nzB   = a->B->cmap->n;
4518     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4519     ncols = 0;
4520     for (i=0; i<nzB; i++) {
4521       if (cmap[i] < start) idx[ncols++] = cmap[i];
4522       else break;
4523     }
4524     imark = i;
4525     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4526     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4527     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4528   } else {
4529     iscola = *col;
4530   }
4531   if (scall != MAT_INITIAL_MATRIX) {
4532     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4533     aloc[0] = *A_loc;
4534   }
4535   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4536   *A_loc = aloc[0];
4537   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4538   if (!row) {
4539     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4540   }
4541   if (!col) {
4542     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4543   }
4544   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4545   PetscFunctionReturn(0);
4546 }
4547 
4548 #undef __FUNCT__
4549 #define __FUNCT__ "MatGetBrowsOfAcols"
4550 /*@C
4551     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4552 
4553     Collective on Mat
4554 
4555    Input Parameters:
4556 +    A,B - the matrices in mpiaij format
4557 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4558 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4559 
4560    Output Parameter:
4561 +    rowb, colb - index sets of rows and columns of B to extract
4562 -    B_seq - the sequential matrix generated
4563 
4564     Level: developer
4565 
4566 @*/
4567 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4568 {
4569   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4570   PetscErrorCode ierr;
4571   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4572   IS             isrowb,iscolb;
4573   Mat            *bseq=NULL;
4574 
4575   PetscFunctionBegin;
4576   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4577     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4578   }
4579   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4580 
4581   if (scall == MAT_INITIAL_MATRIX) {
4582     start = A->cmap->rstart;
4583     cmap  = a->garray;
4584     nzA   = a->A->cmap->n;
4585     nzB   = a->B->cmap->n;
4586     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4587     ncols = 0;
4588     for (i=0; i<nzB; i++) {  /* row < local row index */
4589       if (cmap[i] < start) idx[ncols++] = cmap[i];
4590       else break;
4591     }
4592     imark = i;
4593     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4594     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4595     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4596     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4597   } else {
4598     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4599     isrowb  = *rowb; iscolb = *colb;
4600     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4601     bseq[0] = *B_seq;
4602   }
4603   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4604   *B_seq = bseq[0];
4605   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4606   if (!rowb) {
4607     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4608   } else {
4609     *rowb = isrowb;
4610   }
4611   if (!colb) {
4612     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4613   } else {
4614     *colb = iscolb;
4615   }
4616   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4617   PetscFunctionReturn(0);
4618 }
4619 
4620 #undef __FUNCT__
4621 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4622 /*
4623     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4624     of the OFF-DIAGONAL portion of local A
4625 
4626     Collective on Mat
4627 
4628    Input Parameters:
4629 +    A,B - the matrices in mpiaij format
4630 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4631 
4632    Output Parameter:
4633 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4634 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4635 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4636 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4637 
4638     Level: developer
4639 
4640 */
4641 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4642 {
4643   VecScatter_MPI_General *gen_to,*gen_from;
4644   PetscErrorCode         ierr;
4645   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4646   Mat_SeqAIJ             *b_oth;
4647   VecScatter             ctx =a->Mvctx;
4648   MPI_Comm               comm;
4649   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4650   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4651   PetscInt               *rvalues,*svalues;
4652   MatScalar              *b_otha,*bufa,*bufA;
4653   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4654   MPI_Request            *rwaits = NULL,*swaits = NULL;
4655   MPI_Status             *sstatus,rstatus;
4656   PetscMPIInt            jj,size;
4657   PetscInt               *cols,sbs,rbs;
4658   PetscScalar            *vals;
4659 
4660   PetscFunctionBegin;
4661   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4662   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4663 
4664   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4665     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4666   }
4667   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4668   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4669 
4670   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4671   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4672   nrecvs   = gen_from->n;
4673   nsends   = gen_to->n;
4674 
4675   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4676   srow    = gen_to->indices;    /* local row index to be sent */
4677   sstarts = gen_to->starts;
4678   sprocs  = gen_to->procs;
4679   sstatus = gen_to->sstatus;
4680   sbs     = gen_to->bs;
4681   rstarts = gen_from->starts;
4682   rprocs  = gen_from->procs;
4683   rbs     = gen_from->bs;
4684 
4685   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4686   if (scall == MAT_INITIAL_MATRIX) {
4687     /* i-array */
4688     /*---------*/
4689     /*  post receives */
4690     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
4691     for (i=0; i<nrecvs; i++) {
4692       rowlen = rvalues + rstarts[i]*rbs;
4693       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4694       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4695     }
4696 
4697     /* pack the outgoing message */
4698     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4699 
4700     sstartsj[0] = 0;
4701     rstartsj[0] = 0;
4702     len         = 0; /* total length of j or a array to be sent */
4703     k           = 0;
4704     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
4705     for (i=0; i<nsends; i++) {
4706       rowlen = svalues + sstarts[i]*sbs;
4707       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4708       for (j=0; j<nrows; j++) {
4709         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4710         for (l=0; l<sbs; l++) {
4711           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4712 
4713           rowlen[j*sbs+l] = ncols;
4714 
4715           len += ncols;
4716           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4717         }
4718         k++;
4719       }
4720       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4721 
4722       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4723     }
4724     /* recvs and sends of i-array are completed */
4725     i = nrecvs;
4726     while (i--) {
4727       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4728     }
4729     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4730     ierr = PetscFree(svalues);CHKERRQ(ierr);
4731 
4732     /* allocate buffers for sending j and a arrays */
4733     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4734     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4735 
4736     /* create i-array of B_oth */
4737     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4738 
4739     b_othi[0] = 0;
4740     len       = 0; /* total length of j or a array to be received */
4741     k         = 0;
4742     for (i=0; i<nrecvs; i++) {
4743       rowlen = rvalues + rstarts[i]*rbs;
4744       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4745       for (j=0; j<nrows; j++) {
4746         b_othi[k+1] = b_othi[k] + rowlen[j];
4747         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4748         k++;
4749       }
4750       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4751     }
4752     ierr = PetscFree(rvalues);CHKERRQ(ierr);
4753 
4754     /* allocate space for j and a arrrays of B_oth */
4755     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4756     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4757 
4758     /* j-array */
4759     /*---------*/
4760     /*  post receives of j-array */
4761     for (i=0; i<nrecvs; i++) {
4762       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4763       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4764     }
4765 
4766     /* pack the outgoing message j-array */
4767     k = 0;
4768     for (i=0; i<nsends; i++) {
4769       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4770       bufJ  = bufj+sstartsj[i];
4771       for (j=0; j<nrows; j++) {
4772         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4773         for (ll=0; ll<sbs; ll++) {
4774           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4775           for (l=0; l<ncols; l++) {
4776             *bufJ++ = cols[l];
4777           }
4778           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4779         }
4780       }
4781       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4782     }
4783 
4784     /* recvs and sends of j-array are completed */
4785     i = nrecvs;
4786     while (i--) {
4787       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4788     }
4789     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4790   } else if (scall == MAT_REUSE_MATRIX) {
4791     sstartsj = *startsj_s;
4792     rstartsj = *startsj_r;
4793     bufa     = *bufa_ptr;
4794     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4795     b_otha   = b_oth->a;
4796   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4797 
4798   /* a-array */
4799   /*---------*/
4800   /*  post receives of a-array */
4801   for (i=0; i<nrecvs; i++) {
4802     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4803     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4804   }
4805 
4806   /* pack the outgoing message a-array */
4807   k = 0;
4808   for (i=0; i<nsends; i++) {
4809     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4810     bufA  = bufa+sstartsj[i];
4811     for (j=0; j<nrows; j++) {
4812       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4813       for (ll=0; ll<sbs; ll++) {
4814         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4815         for (l=0; l<ncols; l++) {
4816           *bufA++ = vals[l];
4817         }
4818         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4819       }
4820     }
4821     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4822   }
4823   /* recvs and sends of a-array are completed */
4824   i = nrecvs;
4825   while (i--) {
4826     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4827   }
4828   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4829   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4830 
4831   if (scall == MAT_INITIAL_MATRIX) {
4832     /* put together the new matrix */
4833     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4834 
4835     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4836     /* Since these are PETSc arrays, change flags to free them as necessary. */
4837     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4838     b_oth->free_a  = PETSC_TRUE;
4839     b_oth->free_ij = PETSC_TRUE;
4840     b_oth->nonew   = 0;
4841 
4842     ierr = PetscFree(bufj);CHKERRQ(ierr);
4843     if (!startsj_s || !bufa_ptr) {
4844       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4845       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4846     } else {
4847       *startsj_s = sstartsj;
4848       *startsj_r = rstartsj;
4849       *bufa_ptr  = bufa;
4850     }
4851   }
4852   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4853   PetscFunctionReturn(0);
4854 }
4855 
4856 #undef __FUNCT__
4857 #define __FUNCT__ "MatGetCommunicationStructs"
4858 /*@C
4859   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4860 
4861   Not Collective
4862 
4863   Input Parameters:
4864 . A - The matrix in mpiaij format
4865 
4866   Output Parameter:
4867 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4868 . colmap - A map from global column index to local index into lvec
4869 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4870 
4871   Level: developer
4872 
4873 @*/
4874 #if defined(PETSC_USE_CTABLE)
4875 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4876 #else
4877 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4878 #endif
4879 {
4880   Mat_MPIAIJ *a;
4881 
4882   PetscFunctionBegin;
4883   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4884   PetscValidPointer(lvec, 2);
4885   PetscValidPointer(colmap, 3);
4886   PetscValidPointer(multScatter, 4);
4887   a = (Mat_MPIAIJ*) A->data;
4888   if (lvec) *lvec = a->lvec;
4889   if (colmap) *colmap = a->colmap;
4890   if (multScatter) *multScatter = a->Mvctx;
4891   PetscFunctionReturn(0);
4892 }
4893 
4894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4897 #if defined(PETSC_HAVE_ELEMENTAL)
4898 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4899 #endif
4900 #if defined(PETSC_HAVE_HYPRE)
4901 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
4902 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
4903 #endif
4904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
4905 
4906 #undef __FUNCT__
4907 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4908 /*
4909     Computes (B'*A')' since computing B*A directly is untenable
4910 
4911                n                       p                          p
4912         (              )       (              )         (                  )
4913       m (      A       )  *  n (       B      )   =   m (         C        )
4914         (              )       (              )         (                  )
4915 
4916 */
4917 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4918 {
4919   PetscErrorCode ierr;
4920   Mat            At,Bt,Ct;
4921 
4922   PetscFunctionBegin;
4923   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4924   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4925   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4926   ierr = MatDestroy(&At);CHKERRQ(ierr);
4927   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4928   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4929   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 #undef __FUNCT__
4934 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4935 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4936 {
4937   PetscErrorCode ierr;
4938   PetscInt       m=A->rmap->n,n=B->cmap->n;
4939   Mat            Cmat;
4940 
4941   PetscFunctionBegin;
4942   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4943   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4944   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4945   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4946   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4947   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4948   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4949   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4950 
4951   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4952 
4953   *C = Cmat;
4954   PetscFunctionReturn(0);
4955 }
4956 
4957 /* ----------------------------------------------------------------*/
4958 #undef __FUNCT__
4959 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4960 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4961 {
4962   PetscErrorCode ierr;
4963 
4964   PetscFunctionBegin;
4965   if (scall == MAT_INITIAL_MATRIX) {
4966     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4967     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4968     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4969   }
4970   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4971   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4972   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4973   PetscFunctionReturn(0);
4974 }
4975 
4976 /*MC
4977    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4978 
4979    Options Database Keys:
4980 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4981 
4982   Level: beginner
4983 
4984 .seealso: MatCreateAIJ()
4985 M*/
4986 
4987 #undef __FUNCT__
4988 #define __FUNCT__ "MatCreate_MPIAIJ"
4989 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4990 {
4991   Mat_MPIAIJ     *b;
4992   PetscErrorCode ierr;
4993   PetscMPIInt    size;
4994 
4995   PetscFunctionBegin;
4996   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4997 
4998   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4999   B->data       = (void*)b;
5000   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5001   B->assembled  = PETSC_FALSE;
5002   B->insertmode = NOT_SET_VALUES;
5003   b->size       = size;
5004 
5005   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5006 
5007   /* build cache for off array entries formed */
5008   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5009 
5010   b->donotstash  = PETSC_FALSE;
5011   b->colmap      = 0;
5012   b->garray      = 0;
5013   b->roworiented = PETSC_TRUE;
5014 
5015   /* stuff used for matrix vector multiply */
5016   b->lvec  = NULL;
5017   b->Mvctx = NULL;
5018 
5019   /* stuff for MatGetRow() */
5020   b->rowindices   = 0;
5021   b->rowvalues    = 0;
5022   b->getrowactive = PETSC_FALSE;
5023 
5024   /* flexible pointer used in CUSP/CUSPARSE classes */
5025   b->spptr = NULL;
5026 
5027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5037 #if defined(PETSC_HAVE_ELEMENTAL)
5038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5039 #endif
5040 #if defined(PETSC_HAVE_HYPRE)
5041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5042 #endif
5043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5047 #if defined(PETSC_HAVE_HYPRE)
5048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5049 #endif
5050   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5051   PetscFunctionReturn(0);
5052 }
5053 
5054 #undef __FUNCT__
5055 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5056 /*@C
5057      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5058          and "off-diagonal" part of the matrix in CSR format.
5059 
5060    Collective on MPI_Comm
5061 
5062    Input Parameters:
5063 +  comm - MPI communicator
5064 .  m - number of local rows (Cannot be PETSC_DECIDE)
5065 .  n - This value should be the same as the local size used in creating the
5066        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5067        calculated if N is given) For square matrices n is almost always m.
5068 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5069 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5070 .   i - row indices for "diagonal" portion of matrix
5071 .   j - column indices
5072 .   a - matrix values
5073 .   oi - row indices for "off-diagonal" portion of matrix
5074 .   oj - column indices
5075 -   oa - matrix values
5076 
5077    Output Parameter:
5078 .   mat - the matrix
5079 
5080    Level: advanced
5081 
5082    Notes:
5083        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5084        must free the arrays once the matrix has been destroyed and not before.
5085 
5086        The i and j indices are 0 based
5087 
5088        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5089 
5090        This sets local rows and cannot be used to set off-processor values.
5091 
5092        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5093        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5094        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5095        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5096        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5097        communication if it is known that only local entries will be set.
5098 
5099 .keywords: matrix, aij, compressed row, sparse, parallel
5100 
5101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5102           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5103 @*/
5104 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5105 {
5106   PetscErrorCode ierr;
5107   Mat_MPIAIJ     *maij;
5108 
5109   PetscFunctionBegin;
5110   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5111   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5112   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5113   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5114   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5115   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5116   maij = (Mat_MPIAIJ*) (*mat)->data;
5117 
5118   (*mat)->preallocated = PETSC_TRUE;
5119 
5120   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5121   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5122 
5123   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5124   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5125 
5126   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5127   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5128   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5129   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5130 
5131   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5132   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5133   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5134   PetscFunctionReturn(0);
5135 }
5136 
5137 /*
5138     Special version for direct calls from Fortran
5139 */
5140 #include <petsc/private/fortranimpl.h>
5141 
5142 /* Change these macros so can be used in void function */
5143 #undef CHKERRQ
5144 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5145 #undef SETERRQ2
5146 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5147 #undef SETERRQ3
5148 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5149 #undef SETERRQ
5150 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5151 
5152 #undef __FUNCT__
5153 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5154 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5155 #define __FUNCT__ "MATSETVALUESMPIAIJ"
5156 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5157 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5158 #define __FUNCT__ "matsetvaluesmpiaij"
5159 #else
5160 #define __FUNCT__ "matsetvaluesmpiaij_"
5161 #endif
5162 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5163 {
5164   Mat            mat  = *mmat;
5165   PetscInt       m    = *mm, n = *mn;
5166   InsertMode     addv = *maddv;
5167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5168   PetscScalar    value;
5169   PetscErrorCode ierr;
5170 
5171   MatCheckPreallocated(mat,1);
5172   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5173 
5174 #if defined(PETSC_USE_DEBUG)
5175   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5176 #endif
5177   {
5178     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5179     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5180     PetscBool roworiented = aij->roworiented;
5181 
5182     /* Some Variables required in the macro */
5183     Mat        A                 = aij->A;
5184     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5185     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5186     MatScalar  *aa               = a->a;
5187     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5188     Mat        B                 = aij->B;
5189     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5190     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5191     MatScalar  *ba               = b->a;
5192 
5193     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5194     PetscInt  nonew = a->nonew;
5195     MatScalar *ap1,*ap2;
5196 
5197     PetscFunctionBegin;
5198     for (i=0; i<m; i++) {
5199       if (im[i] < 0) continue;
5200 #if defined(PETSC_USE_DEBUG)
5201       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5202 #endif
5203       if (im[i] >= rstart && im[i] < rend) {
5204         row      = im[i] - rstart;
5205         lastcol1 = -1;
5206         rp1      = aj + ai[row];
5207         ap1      = aa + ai[row];
5208         rmax1    = aimax[row];
5209         nrow1    = ailen[row];
5210         low1     = 0;
5211         high1    = nrow1;
5212         lastcol2 = -1;
5213         rp2      = bj + bi[row];
5214         ap2      = ba + bi[row];
5215         rmax2    = bimax[row];
5216         nrow2    = bilen[row];
5217         low2     = 0;
5218         high2    = nrow2;
5219 
5220         for (j=0; j<n; j++) {
5221           if (roworiented) value = v[i*n+j];
5222           else value = v[i+j*m];
5223           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5224           if (in[j] >= cstart && in[j] < cend) {
5225             col = in[j] - cstart;
5226             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5227           } else if (in[j] < 0) continue;
5228 #if defined(PETSC_USE_DEBUG)
5229           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5230 #endif
5231           else {
5232             if (mat->was_assembled) {
5233               if (!aij->colmap) {
5234                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5235               }
5236 #if defined(PETSC_USE_CTABLE)
5237               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5238               col--;
5239 #else
5240               col = aij->colmap[in[j]] - 1;
5241 #endif
5242               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5243                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5244                 col  =  in[j];
5245                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5246                 B     = aij->B;
5247                 b     = (Mat_SeqAIJ*)B->data;
5248                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5249                 rp2   = bj + bi[row];
5250                 ap2   = ba + bi[row];
5251                 rmax2 = bimax[row];
5252                 nrow2 = bilen[row];
5253                 low2  = 0;
5254                 high2 = nrow2;
5255                 bm    = aij->B->rmap->n;
5256                 ba    = b->a;
5257               }
5258             } else col = in[j];
5259             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5260           }
5261         }
5262       } else if (!aij->donotstash) {
5263         if (roworiented) {
5264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5265         } else {
5266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5267         }
5268       }
5269     }
5270   }
5271   PetscFunctionReturnVoid();
5272 }
5273 
5274