xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1b5aa96474b99f5c255b3b83465f0fe5cbbf6ade)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685   InsertMode     addv;
686 
687   PetscFunctionBegin;
688   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
693   mat->insertmode = addv; /* in case this processor had no cache */
694 
695   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
696   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
697   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
703 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
704 {
705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
706   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
707   PetscErrorCode ierr;
708   PetscMPIInt    n;
709   PetscInt       i,j,rstart,ncols,flg;
710   PetscInt       *row,*col;
711   PetscBool      other_disassembled;
712   PetscScalar    *val;
713   InsertMode     addv = mat->insertmode;
714 
715   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
716 
717   PetscFunctionBegin;
718   if (!aij->donotstash && !mat->nooffprocentries) {
719     while (1) {
720       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
721       if (!flg) break;
722 
723       for (i=0; i<n; ) {
724         /* Now identify the consecutive vals belonging to the same row */
725         for (j=i,rstart=row[j]; j<n; j++) {
726           if (row[j] != rstart) break;
727         }
728         if (j < n) ncols = j-i;
729         else       ncols = n-i;
730         /* Now assemble all these values with a single function call */
731         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
732 
733         i = j;
734       }
735     }
736     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
737   }
738   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
740 
741   /* determine if any processor has disassembled, if so we must
742      also disassemble ourselfs, in order that we may reassemble. */
743   /*
744      if nonzero structure of submatrix B cannot change then we know that
745      no processor disassembled thus we can skip this stuff
746   */
747   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
748     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749     if (mat->was_assembled && !other_disassembled) {
750       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
751     }
752   }
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
755   }
756   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
757   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
758   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
759 
760   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
761 
762   aij->rowvalues = 0;
763 
764   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
765   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
766 
767   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
768   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
769     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
770     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
778 {
779   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
784   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRows_MPIAIJ"
790 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
793   PetscInt      *owners = A->rmap->range;
794   PetscInt       n      = A->rmap->n;
795   PetscSF        sf;
796   PetscInt      *lrows;
797   PetscSFNode   *rrows;
798   PetscInt       r, p = 0, len = 0;
799   PetscErrorCode ierr;
800 
801   PetscFunctionBegin;
802   /* Create SF where leaves are input rows and roots are owned rows */
803   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
804   for (r = 0; r < n; ++r) lrows[r] = -1;
805   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
806   for (r = 0; r < N; ++r) {
807     const PetscInt idx   = rows[r];
808     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
809     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
810       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
811     }
812     if (A->nooffproczerorows) {
813       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
814       lrows[len++] = idx - owners[p];
815     } else {
816       rrows[r].rank = p;
817       rrows[r].index = rows[r] - owners[p];
818     }
819   }
820   if (!A->nooffproczerorows) {
821     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
822     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
823     /* Collect flags for rows to be zeroed */
824     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
825     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
826     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
827     /* Compress and put in row numbers */
828     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
829   }
830   /* fix right hand side if needed */
831   if (x && b) {
832     const PetscScalar *xx;
833     PetscScalar       *bb;
834 
835     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
836     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
837     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
838     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
839     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
840   }
841   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
844     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
848     for (r = 0; r < len; ++r) {
849       const PetscInt row = lrows[r] + A->rmap->rstart;
850       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
851     }
852     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   } else {
855     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(lrows);CHKERRQ(ierr);
858 
859   /* only change matrix nonzero state if pattern was allowed to be changed */
860   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
861     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
862     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
869 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
870 {
871   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       n = A->rmap->n;
874   PetscInt          i,j,r,m,p = 0,len = 0;
875   PetscInt          *lrows,*owners = A->rmap->range;
876   PetscSFNode       *rrows;
877   PetscSF           sf;
878   const PetscScalar *xx;
879   PetscScalar       *bb,*mask;
880   Vec               xmask,lmask;
881   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
882   const PetscInt    *aj, *ii,*ridx;
883   PetscScalar       *aa;
884 
885   PetscFunctionBegin;
886   /* Create SF where leaves are input rows and roots are owned rows */
887   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
888   for (r = 0; r < n; ++r) lrows[r] = -1;
889   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
890   for (r = 0; r < N; ++r) {
891     const PetscInt idx   = rows[r];
892     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
893     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
894       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
895     }
896     rrows[r].rank  = p;
897     rrows[r].index = rows[r] - owners[p];
898   }
899   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
900   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
901   /* Collect flags for rows to be zeroed */
902   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
904   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
905   /* Compress and put in row numbers */
906   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
907   /* zero diagonal part of matrix */
908   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
909   /* handle off diagonal part of matrix */
910   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
911   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
912   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
913   for (i=0; i<len; i++) bb[lrows[i]] = 1;
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
918   if (x) {
919     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
922     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923   }
924   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
925   /* remove zeroed rows of off diagonal matrix */
926   ii = aij->i;
927   for (i=0; i<len; i++) {
928     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
929   }
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use) {
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[*ridx] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952     for (i=0; i<m; i++) {
953       n  = ii[i+1] - ii[i];
954       aj = aij->j + ii[i];
955       aa = aij->a + ii[i];
956       for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           if (b) bb[i] -= *aa*xx[*aj];
959           *aa = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966   if (x) {
967     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   }
970   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
971   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
972   ierr = PetscFree(lrows);CHKERRQ(ierr);
973 
974   /* only change matrix nonzero state if pattern was allowed to be changed */
975   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
976     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
977     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMult_MPIAIJ"
984 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988   PetscInt       nt;
989 
990   PetscFunctionBegin;
991   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
992   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
993   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
995   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1002 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1003 {
1004   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1014 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1022   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1029 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1030 {
1031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1032   PetscErrorCode ierr;
1033   PetscBool      merged;
1034 
1035   PetscFunctionBegin;
1036   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1037   /* do nondiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1039   if (!merged) {
1040     /* send it on its way */
1041     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042     /* do local part */
1043     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1044     /* receive remote parts: note this assumes the values are not actually */
1045     /* added in yy until the next line, */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   } else {
1048     /* do local part */
1049     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1050     /* send it on its way */
1051     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1052     /* values actually were received in the Begin() but we need to call this nop */
1053     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1060 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1061 {
1062   MPI_Comm       comm;
1063   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1064   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1065   IS             Me,Notme;
1066   PetscErrorCode ierr;
1067   PetscInt       M,N,first,last,*notme,i;
1068   PetscMPIInt    size;
1069 
1070   PetscFunctionBegin;
1071   /* Easy test: symmetric diagonal block */
1072   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1073   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1074   if (!*f) PetscFunctionReturn(0);
1075   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1077   if (size == 1) PetscFunctionReturn(0);
1078 
1079   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1080   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1081   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1082   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1083   for (i=0; i<first; i++) notme[i] = i;
1084   for (i=last; i<M; i++) notme[i-last+first] = i;
1085   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1086   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1087   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1088   Aoff = Aoffs[0];
1089   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1090   Boff = Boffs[0];
1091   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1093   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1095   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1096   ierr = PetscFree(notme);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1102 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   /* do nondiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1110   /* send it on its way */
1111   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   /* do local part */
1113   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1114   /* receive remote parts */
1115   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1125 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1126 {
1127   PetscErrorCode ierr;
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129 
1130   PetscFunctionBegin;
1131   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1132   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1133   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatScale_MPIAIJ"
1139 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1140 {
1141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1146   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatDestroy_MPIAIJ"
1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1153 {
1154   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158 #if defined(PETSC_USE_LOG)
1159   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1160 #endif
1161   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1162   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1164   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1165 #if defined(PETSC_USE_CTABLE)
1166   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1167 #else
1168   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1171   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1172   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1173   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1174   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1175   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1176 
1177   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1188 #endif
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1194 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1195 {
1196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1197   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1198   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1199   PetscErrorCode ierr;
1200   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1201   int            fd;
1202   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1203   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1204   PetscScalar    *column_values;
1205   PetscInt       message_count,flowcontrolcount;
1206   FILE           *file;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1210   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1211   nz   = A->nz + B->nz;
1212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217 
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen  = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1224   } else {
1225     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     rlen = mat->rmap->n;
1227   }
1228 
1229   /* load up the local row counts */
1230   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1231   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1232 
1233   /* store the row lengths to the file */
1234   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1235   if (!rank) {
1236     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       rlen = range[i+1] - range[i];
1240       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1250 
1251   /* load up the local column indices */
1252   nzmax = nz; /* th processor needs space a largest processor needs */
1253   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1255   cnt   = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if ((col = garray[B->j[j]]) > cstart) break;
1259       column_indices[cnt++] = col;
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1262     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column indices to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1286 
1287   /* load up the local column values */
1288   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1289   cnt  = 0;
1290   for (i=0; i<mat->rmap->n; i++) {
1291     for (j=B->i[i]; j<B->i[i+1]; j++) {
1292       if (garray[B->j[j]] > cstart) break;
1293       column_values[cnt++] = B->a[j];
1294     }
1295     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1296     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1297   }
1298   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1299 
1300   /* store the column values to the file */
1301   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1302   if (!rank) {
1303     MPI_Status status;
1304     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1308       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1309       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1311     }
1312     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1313   } else {
1314     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1315     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1318   }
1319   ierr = PetscFree(column_values);CHKERRQ(ierr);
1320 
1321   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1322   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #include <petscdraw.h>
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1329 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1330 {
1331   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1332   PetscErrorCode    ierr;
1333   PetscMPIInt       rank = aij->rank,size = aij->size;
1334   PetscBool         isdraw,iascii,isbinary;
1335   PetscViewer       sviewer;
1336   PetscViewerFormat format;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1342   if (iascii) {
1343     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1344     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1345       MatInfo   info;
1346       PetscBool inodes;
1347 
1348       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1349       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1350       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1352       if (!inodes) {
1353         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1354                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1355       } else {
1356         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1357                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1358       }
1359       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1361       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1366       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1367       PetscFunctionReturn(0);
1368     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1369       PetscInt inodecount,inodelimit,*inodes;
1370       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1371       if (inodes) {
1372         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1373       } else {
1374         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1375       }
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1378       PetscFunctionReturn(0);
1379     }
1380   } else if (isbinary) {
1381     if (size == 1) {
1382       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1384     } else {
1385       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1386     }
1387     PetscFunctionReturn(0);
1388   } else if (isdraw) {
1389     PetscDraw draw;
1390     PetscBool isnull;
1391     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1392     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1452     ierr = MatDestroy(&A);CHKERRQ(ierr);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatView_MPIAIJ"
1459 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1460 {
1461   PetscErrorCode ierr;
1462   PetscBool      iascii,isdraw,issocket,isbinary;
1463 
1464   PetscFunctionBegin;
1465   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1469   if (iascii || isdraw || isbinary || issocket) {
1470     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatSOR_MPIAIJ"
1477 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1478 {
1479   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1480   PetscErrorCode ierr;
1481   Vec            bb1 = 0;
1482   PetscBool      hasop;
1483 
1484   PetscFunctionBegin;
1485   if (flag == SOR_APPLY_UPPER) {
1486     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1487     PetscFunctionReturn(0);
1488   }
1489 
1490   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1491     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1492   }
1493 
1494   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1495     if (flag & SOR_ZERO_INITIAL_GUESS) {
1496       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1497       its--;
1498     }
1499 
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1512     if (flag & SOR_ZERO_INITIAL_GUESS) {
1513       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514       its--;
1515     }
1516     while (its--) {
1517       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519 
1520       /* update rhs: bb1 = bb - B*x */
1521       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1522       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1523 
1524       /* local sweep */
1525       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1526     }
1527   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1528     if (flag & SOR_ZERO_INITIAL_GUESS) {
1529       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1530       its--;
1531     }
1532     while (its--) {
1533       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1534       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535 
1536       /* update rhs: bb1 = bb - B*x */
1537       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1538       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1539 
1540       /* local sweep */
1541       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1542     }
1543   } else if (flag & SOR_EISENSTAT) {
1544     Vec xx1;
1545 
1546     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1547     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1548 
1549     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1550     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     if (!mat->diag) {
1552       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1553       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1554     }
1555     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1556     if (hasop) {
1557       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1558     } else {
1559       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1560     }
1561     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1562 
1563     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1564 
1565     /* local sweep */
1566     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1567     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1568     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1569   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1570 
1571   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatPermute_MPIAIJ"
1577 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1578 {
1579   Mat            aA,aB,Aperm;
1580   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1581   PetscScalar    *aa,*ba;
1582   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1583   PetscSF        rowsf,sf;
1584   IS             parcolp = NULL;
1585   PetscBool      done;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1590   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1591   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1592   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1593 
1594   /* Invert row permutation to find out where my rows should go */
1595   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1596   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1597   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1598   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1599   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1600   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601 
1602   /* Invert column permutation to find out where my columns should go */
1603   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1604   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1605   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1606   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1607   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1608   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1610 
1611   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1612   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1613   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1614 
1615   /* Find out where my gcols should go */
1616   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1617   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1618   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1619   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1620   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1621   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1622   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1624 
1625   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1626   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1628   for (i=0; i<m; i++) {
1629     PetscInt row = rdest[i],rowner;
1630     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1631     for (j=ai[i]; j<ai[i+1]; j++) {
1632       PetscInt cowner,col = cdest[aj[j]];
1633       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1634       if (rowner == cowner) dnnz[i]++;
1635       else onnz[i]++;
1636     }
1637     for (j=bi[i]; j<bi[i+1]; j++) {
1638       PetscInt cowner,col = gcdest[bj[j]];
1639       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1640       if (rowner == cowner) dnnz[i]++;
1641       else onnz[i]++;
1642     }
1643   }
1644   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1645   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1649 
1650   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1651   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) {
1654     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1655     PetscInt j0,rowlen;
1656     rowlen = ai[i+1] - ai[i];
1657     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1658       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1659       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1660     }
1661     rowlen = bi[i+1] - bi[i];
1662     for (j0=j=0; j<rowlen; j0=j) {
1663       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1664       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1665     }
1666   }
1667   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1671   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1673   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1674   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1675   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1676   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1677   *B = Aperm;
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1683 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1684 {
1685   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1686   Mat            A    = mat->A,B = mat->B;
1687   PetscErrorCode ierr;
1688   PetscReal      isend[5],irecv[5];
1689 
1690   PetscFunctionBegin;
1691   info->block_size = 1.0;
1692   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1693 
1694   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1695   isend[3] = info->memory;  isend[4] = info->mallocs;
1696 
1697   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1698 
1699   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1700   isend[3] += info->memory;  isend[4] += info->mallocs;
1701   if (flag == MAT_LOCAL) {
1702     info->nz_used      = isend[0];
1703     info->nz_allocated = isend[1];
1704     info->nz_unneeded  = isend[2];
1705     info->memory       = isend[3];
1706     info->mallocs      = isend[4];
1707   } else if (flag == MAT_GLOBAL_MAX) {
1708     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1709 
1710     info->nz_used      = irecv[0];
1711     info->nz_allocated = irecv[1];
1712     info->nz_unneeded  = irecv[2];
1713     info->memory       = irecv[3];
1714     info->mallocs      = irecv[4];
1715   } else if (flag == MAT_GLOBAL_SUM) {
1716     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1717 
1718     info->nz_used      = irecv[0];
1719     info->nz_allocated = irecv[1];
1720     info->nz_unneeded  = irecv[2];
1721     info->memory       = irecv[3];
1722     info->mallocs      = irecv[4];
1723   }
1724   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1725   info->fill_ratio_needed = 0;
1726   info->factor_mallocs    = 0;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatSetOption_MPIAIJ"
1732 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1733 {
1734   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   switch (op) {
1739   case MAT_NEW_NONZERO_LOCATIONS:
1740   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1741   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1742   case MAT_KEEP_NONZERO_PATTERN:
1743   case MAT_NEW_NONZERO_LOCATION_ERR:
1744   case MAT_USE_INODES:
1745   case MAT_IGNORE_ZERO_ENTRIES:
1746     MatCheckPreallocated(A,1);
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1749     break;
1750   case MAT_ROW_ORIENTED:
1751     a->roworiented = flg;
1752 
1753     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1754     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1755     break;
1756   case MAT_NEW_DIAGONALS:
1757     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1758     break;
1759   case MAT_IGNORE_OFF_PROC_ENTRIES:
1760     a->donotstash = flg;
1761     break;
1762   case MAT_SPD:
1763     A->spd_set = PETSC_TRUE;
1764     A->spd     = flg;
1765     if (flg) {
1766       A->symmetric                  = PETSC_TRUE;
1767       A->structurally_symmetric     = PETSC_TRUE;
1768       A->symmetric_set              = PETSC_TRUE;
1769       A->structurally_symmetric_set = PETSC_TRUE;
1770     }
1771     break;
1772   case MAT_SYMMETRIC:
1773     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1774     break;
1775   case MAT_STRUCTURALLY_SYMMETRIC:
1776     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1777     break;
1778   case MAT_HERMITIAN:
1779     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1780     break;
1781   case MAT_SYMMETRY_ETERNAL:
1782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1783     break;
1784   default:
1785     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1786   }
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatGetRow_MPIAIJ"
1792 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1793 {
1794   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1795   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1796   PetscErrorCode ierr;
1797   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1798   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1799   PetscInt       *cmap,*idx_p;
1800 
1801   PetscFunctionBegin;
1802   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1803   mat->getrowactive = PETSC_TRUE;
1804 
1805   if (!mat->rowvalues && (idx || v)) {
1806     /*
1807         allocate enough space to hold information from the longest row.
1808     */
1809     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1810     PetscInt   max = 1,tmp;
1811     for (i=0; i<matin->rmap->n; i++) {
1812       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1813       if (max < tmp) max = tmp;
1814     }
1815     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1816   }
1817 
1818   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1819   lrow = row - rstart;
1820 
1821   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1822   if (!v)   {pvA = 0; pvB = 0;}
1823   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1824   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1825   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1826   nztot = nzA + nzB;
1827 
1828   cmap = mat->garray;
1829   if (v  || idx) {
1830     if (nztot) {
1831       /* Sort by increasing column numbers, assuming A and B already sorted */
1832       PetscInt imark = -1;
1833       if (v) {
1834         *v = v_p = mat->rowvalues;
1835         for (i=0; i<nzB; i++) {
1836           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1837           else break;
1838         }
1839         imark = i;
1840         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1841         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1842       }
1843       if (idx) {
1844         *idx = idx_p = mat->rowindices;
1845         if (imark > -1) {
1846           for (i=0; i<imark; i++) {
1847             idx_p[i] = cmap[cworkB[i]];
1848           }
1849         } else {
1850           for (i=0; i<nzB; i++) {
1851             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1852             else break;
1853           }
1854           imark = i;
1855         }
1856         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1857         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1858       }
1859     } else {
1860       if (idx) *idx = 0;
1861       if (v)   *v   = 0;
1862     }
1863   }
1864   *nz  = nztot;
1865   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1866   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1872 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1873 {
1874   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1875 
1876   PetscFunctionBegin;
1877   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1878   aij->getrowactive = PETSC_FALSE;
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatNorm_MPIAIJ"
1884 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1885 {
1886   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1887   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1888   PetscErrorCode ierr;
1889   PetscInt       i,j,cstart = mat->cmap->rstart;
1890   PetscReal      sum = 0.0;
1891   MatScalar      *v;
1892 
1893   PetscFunctionBegin;
1894   if (aij->size == 1) {
1895     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1896   } else {
1897     if (type == NORM_FROBENIUS) {
1898       v = amat->a;
1899       for (i=0; i<amat->nz; i++) {
1900         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1901       }
1902       v = bmat->a;
1903       for (i=0; i<bmat->nz; i++) {
1904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1905       }
1906       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1907       *norm = PetscSqrtReal(*norm);
1908     } else if (type == NORM_1) { /* max column norm */
1909       PetscReal *tmp,*tmp2;
1910       PetscInt  *jj,*garray = aij->garray;
1911       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1912       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1913       *norm = 0.0;
1914       v     = amat->a; jj = amat->j;
1915       for (j=0; j<amat->nz; j++) {
1916         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1917       }
1918       v = bmat->a; jj = bmat->j;
1919       for (j=0; j<bmat->nz; j++) {
1920         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1921       }
1922       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1923       for (j=0; j<mat->cmap->N; j++) {
1924         if (tmp2[j] > *norm) *norm = tmp2[j];
1925       }
1926       ierr = PetscFree(tmp);CHKERRQ(ierr);
1927       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1928     } else if (type == NORM_INFINITY) { /* max row norm */
1929       PetscReal ntemp = 0.0;
1930       for (j=0; j<aij->A->rmap->n; j++) {
1931         v   = amat->a + amat->i[j];
1932         sum = 0.0;
1933         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1934           sum += PetscAbsScalar(*v); v++;
1935         }
1936         v = bmat->a + bmat->i[j];
1937         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1938           sum += PetscAbsScalar(*v); v++;
1939         }
1940         if (sum > ntemp) ntemp = sum;
1941       }
1942       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1943     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatTranspose_MPIAIJ"
1950 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1951 {
1952   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1953   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1954   PetscErrorCode ierr;
1955   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1956   PetscInt       cstart = A->cmap->rstart,ncol;
1957   Mat            B;
1958   MatScalar      *array;
1959 
1960   PetscFunctionBegin;
1961   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1962 
1963   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1964   ai = Aloc->i; aj = Aloc->j;
1965   bi = Bloc->i; bj = Bloc->j;
1966   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1967     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1968     PetscSFNode          *oloc;
1969     PETSC_UNUSED PetscSF sf;
1970 
1971     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1972     /* compute d_nnz for preallocation */
1973     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1974     for (i=0; i<ai[ma]; i++) {
1975       d_nnz[aj[i]]++;
1976       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1977     }
1978     /* compute local off-diagonal contributions */
1979     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1980     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1981     /* map those to global */
1982     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1983     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1984     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1985     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1986     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1987     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1988     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1989 
1990     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1991     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1992     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1993     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1994     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1995     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1996   } else {
1997     B    = *matout;
1998     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1999     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2000   }
2001 
2002   /* copy over the A part */
2003   array = Aloc->a;
2004   row   = A->rmap->rstart;
2005   for (i=0; i<ma; i++) {
2006     ncol = ai[i+1]-ai[i];
2007     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2008     row++;
2009     array += ncol; aj += ncol;
2010   }
2011   aj = Aloc->j;
2012   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2013 
2014   /* copy over the B part */
2015   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2016   array = Bloc->a;
2017   row   = A->rmap->rstart;
2018   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2019   cols_tmp = cols;
2020   for (i=0; i<mb; i++) {
2021     ncol = bi[i+1]-bi[i];
2022     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2023     row++;
2024     array += ncol; cols_tmp += ncol;
2025   }
2026   ierr = PetscFree(cols);CHKERRQ(ierr);
2027 
2028   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2029   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2030   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2031     *matout = B;
2032   } else {
2033     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2040 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2041 {
2042   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2043   Mat            a    = aij->A,b = aij->B;
2044   PetscErrorCode ierr;
2045   PetscInt       s1,s2,s3;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2049   if (rr) {
2050     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2051     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2052     /* Overlap communication with computation. */
2053     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2054   }
2055   if (ll) {
2056     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2057     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2058     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2059   }
2060   /* scale  the diagonal block */
2061   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2062 
2063   if (rr) {
2064     /* Do a scatter end and then right scale the off-diagonal block */
2065     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2066     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2073 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2074 {
2075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2076   PetscErrorCode ierr;
2077 
2078   PetscFunctionBegin;
2079   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatEqual_MPIAIJ"
2085 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2086 {
2087   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2088   Mat            a,b,c,d;
2089   PetscBool      flg;
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   a = matA->A; b = matA->B;
2094   c = matB->A; d = matB->B;
2095 
2096   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2097   if (flg) {
2098     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2099   }
2100   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 #undef __FUNCT__
2105 #define __FUNCT__ "MatCopy_MPIAIJ"
2106 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2107 {
2108   PetscErrorCode ierr;
2109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2110   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2111 
2112   PetscFunctionBegin;
2113   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2114   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2115     /* because of the column compression in the off-processor part of the matrix a->B,
2116        the number of columns in a->B and b->B may be different, hence we cannot call
2117        the MatCopy() directly on the two parts. If need be, we can provide a more
2118        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2119        then copying the submatrices */
2120     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2121   } else {
2122     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2123     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2124   }
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatSetUp_MPIAIJ"
2130 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2131 {
2132   PetscErrorCode ierr;
2133 
2134   PetscFunctionBegin;
2135   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 /*
2140    Computes the number of nonzeros per row needed for preallocation when X and Y
2141    have different nonzero structure.
2142 */
2143 #undef __FUNCT__
2144 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2145 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2146 {
2147   PetscInt       i,j,k,nzx,nzy;
2148 
2149   PetscFunctionBegin;
2150   /* Set the number of nonzeros in the new matrix */
2151   for (i=0; i<m; i++) {
2152     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2153     nzx = xi[i+1] - xi[i];
2154     nzy = yi[i+1] - yi[i];
2155     nnz[i] = 0;
2156     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2157       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2158       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2159       nnz[i]++;
2160     }
2161     for (; k<nzy; k++) nnz[i]++;
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2169 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2170 {
2171   PetscErrorCode ierr;
2172   PetscInt       m = Y->rmap->N;
2173   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2174   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatAXPY_MPIAIJ"
2183 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2184 {
2185   PetscErrorCode ierr;
2186   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2187   PetscBLASInt   bnz,one=1;
2188   Mat_SeqAIJ     *x,*y;
2189 
2190   PetscFunctionBegin;
2191   if (str == SAME_NONZERO_PATTERN) {
2192     PetscScalar alpha = a;
2193     x    = (Mat_SeqAIJ*)xx->A->data;
2194     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2195     y    = (Mat_SeqAIJ*)yy->A->data;
2196     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2197     x    = (Mat_SeqAIJ*)xx->B->data;
2198     y    = (Mat_SeqAIJ*)yy->B->data;
2199     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2200     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2201     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2202   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2203     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2204   } else {
2205     Mat      B;
2206     PetscInt *nnz_d,*nnz_o;
2207     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2208     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2209     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2210     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2211     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2212     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2213     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2214     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2215     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2216     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2217     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2218     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2219     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2220     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2221   }
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatConjugate_MPIAIJ"
2229 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2230 {
2231 #if defined(PETSC_USE_COMPLEX)
2232   PetscErrorCode ierr;
2233   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2234 
2235   PetscFunctionBegin;
2236   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2237   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2238 #else
2239   PetscFunctionBegin;
2240 #endif
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatRealPart_MPIAIJ"
2246 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2247 {
2248   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2249   PetscErrorCode ierr;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2253   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2259 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2260 {
2261   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2262   PetscErrorCode ierr;
2263 
2264   PetscFunctionBegin;
2265   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2266   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #if defined(PETSC_HAVE_PBGL)
2271 
2272 #include <boost/parallel/mpi/bsp_process_group.hpp>
2273 #include <boost/graph/distributed/ilu_default_graph.hpp>
2274 #include <boost/graph/distributed/ilu_0_block.hpp>
2275 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2276 #include <boost/graph/distributed/petsc/interface.hpp>
2277 #include <boost/multi_array.hpp>
2278 #include <boost/parallel/distributed_property_map->hpp>
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2282 /*
2283   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2284 */
2285 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2286 {
2287   namespace petsc = boost::distributed::petsc;
2288 
2289   namespace graph_dist = boost::graph::distributed;
2290   using boost::graph::distributed::ilu_default::process_group_type;
2291   using boost::graph::ilu_permuted;
2292 
2293   PetscBool      row_identity, col_identity;
2294   PetscContainer c;
2295   PetscInt       m, n, M, N;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2300   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2301   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2302   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2303 
2304   process_group_type pg;
2305   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2306   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2307   lgraph_type& level_graph = *lgraph_p;
2308   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2309 
2310   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2311   ilu_permuted(level_graph);
2312 
2313   /* put together the new matrix */
2314   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2315   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2316   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2317   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2318   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2319   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2320   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2321   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2322 
2323   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2324   ierr = PetscContainerSetPointer(c, lgraph_p);
2325   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2326   ierr = PetscContainerDestroy(&c);
2327   PetscFunctionReturn(0);
2328 }
2329 
2330 #undef __FUNCT__
2331 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2332 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2333 {
2334   PetscFunctionBegin;
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatSolve_MPIAIJ"
2340 /*
2341   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2342 */
2343 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2344 {
2345   namespace graph_dist = boost::graph::distributed;
2346 
2347   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2348   lgraph_type    *lgraph_p;
2349   PetscContainer c;
2350   PetscErrorCode ierr;
2351 
2352   PetscFunctionBegin;
2353   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2354   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2355   ierr = VecCopy(b, x);CHKERRQ(ierr);
2356 
2357   PetscScalar *array_x;
2358   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2359   PetscInt sx;
2360   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2361 
2362   PetscScalar *array_b;
2363   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2364   PetscInt sb;
2365   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2366 
2367   lgraph_type& level_graph = *lgraph_p;
2368   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2369 
2370   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2371   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2372   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2373 
2374   typedef boost::iterator_property_map<array_ref_type::iterator,
2375                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2376   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2377   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2378 
2379   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2380   PetscFunctionReturn(0);
2381 }
2382 #endif
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2386 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2387 {
2388   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2389   PetscErrorCode ierr;
2390   PetscInt       i,*idxb = 0;
2391   PetscScalar    *va,*vb;
2392   Vec            vtmp;
2393 
2394   PetscFunctionBegin;
2395   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2396   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2397   if (idx) {
2398     for (i=0; i<A->rmap->n; i++) {
2399       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2400     }
2401   }
2402 
2403   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2404   if (idx) {
2405     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2406   }
2407   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2408   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2409 
2410   for (i=0; i<A->rmap->n; i++) {
2411     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2412       va[i] = vb[i];
2413       if (idx) idx[i] = a->garray[idxb[i]];
2414     }
2415   }
2416 
2417   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2419   ierr = PetscFree(idxb);CHKERRQ(ierr);
2420   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2426 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2427 {
2428   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2429   PetscErrorCode ierr;
2430   PetscInt       i,*idxb = 0;
2431   PetscScalar    *va,*vb;
2432   Vec            vtmp;
2433 
2434   PetscFunctionBegin;
2435   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2436   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2437   if (idx) {
2438     for (i=0; i<A->cmap->n; i++) {
2439       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2440     }
2441   }
2442 
2443   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2444   if (idx) {
2445     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2446   }
2447   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2448   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2449 
2450   for (i=0; i<A->rmap->n; i++) {
2451     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2452       va[i] = vb[i];
2453       if (idx) idx[i] = a->garray[idxb[i]];
2454     }
2455   }
2456 
2457   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2458   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2459   ierr = PetscFree(idxb);CHKERRQ(ierr);
2460   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2466 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2467 {
2468   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2469   PetscInt       n      = A->rmap->n;
2470   PetscInt       cstart = A->cmap->rstart;
2471   PetscInt       *cmap  = mat->garray;
2472   PetscInt       *diagIdx, *offdiagIdx;
2473   Vec            diagV, offdiagV;
2474   PetscScalar    *a, *diagA, *offdiagA;
2475   PetscInt       r;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2480   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2481   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2482   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2483   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2484   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2485   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2486   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2487   for (r = 0; r < n; ++r) {
2488     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2489       a[r]   = diagA[r];
2490       idx[r] = cstart + diagIdx[r];
2491     } else {
2492       a[r]   = offdiagA[r];
2493       idx[r] = cmap[offdiagIdx[r]];
2494     }
2495   }
2496   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2497   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2498   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2499   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2500   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2501   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 #undef __FUNCT__
2506 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2507 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2508 {
2509   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2510   PetscInt       n      = A->rmap->n;
2511   PetscInt       cstart = A->cmap->rstart;
2512   PetscInt       *cmap  = mat->garray;
2513   PetscInt       *diagIdx, *offdiagIdx;
2514   Vec            diagV, offdiagV;
2515   PetscScalar    *a, *diagA, *offdiagA;
2516   PetscInt       r;
2517   PetscErrorCode ierr;
2518 
2519   PetscFunctionBegin;
2520   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2521   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2522   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2523   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2524   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2525   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2526   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2527   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2528   for (r = 0; r < n; ++r) {
2529     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2530       a[r]   = diagA[r];
2531       idx[r] = cstart + diagIdx[r];
2532     } else {
2533       a[r]   = offdiagA[r];
2534       idx[r] = cmap[offdiagIdx[r]];
2535     }
2536   }
2537   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2538   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2539   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2540   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2541   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2542   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2548 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2549 {
2550   PetscErrorCode ierr;
2551   Mat            *dummy;
2552 
2553   PetscFunctionBegin;
2554   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2555   *newmat = *dummy;
2556   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2562 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2563 {
2564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2574 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2575 {
2576   PetscErrorCode ierr;
2577   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2578 
2579   PetscFunctionBegin;
2580   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2581   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2582   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2583   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2589 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2590 {
2591   PetscFunctionBegin;
2592   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2593   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2599 /*@
2600    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2601 
2602    Collective on Mat
2603 
2604    Input Parameters:
2605 +    A - the matrix
2606 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2607 
2608 @*/
2609 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2610 {
2611   PetscErrorCode       ierr;
2612 
2613   PetscFunctionBegin;
2614   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 #undef __FUNCT__
2619 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2620 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptions *PetscOptionsObject,Mat A)
2621 {
2622   PetscErrorCode       ierr;
2623   PetscBool            sc = PETSC_FALSE,flg;
2624 
2625   PetscFunctionBegin;
2626   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2627   ierr = PetscObjectOptionsBegin((PetscObject)A);
2628     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2629     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2630     if (flg) {
2631       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2632     }
2633   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 #undef __FUNCT__
2638 #define __FUNCT__ "MatShift_MPIAIJ"
2639 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2640 {
2641   PetscErrorCode ierr;
2642   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2643   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data,*bij = (Mat_SeqAIJ*)maij->B->data;
2644 
2645   PetscFunctionBegin;
2646   if (!aij->nz && !bij->nz) {
2647     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2648   }
2649   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 /* -------------------------------------------------------------------*/
2654 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2655                                        MatGetRow_MPIAIJ,
2656                                        MatRestoreRow_MPIAIJ,
2657                                        MatMult_MPIAIJ,
2658                                 /* 4*/ MatMultAdd_MPIAIJ,
2659                                        MatMultTranspose_MPIAIJ,
2660                                        MatMultTransposeAdd_MPIAIJ,
2661 #if defined(PETSC_HAVE_PBGL)
2662                                        MatSolve_MPIAIJ,
2663 #else
2664                                        0,
2665 #endif
2666                                        0,
2667                                        0,
2668                                 /*10*/ 0,
2669                                        0,
2670                                        0,
2671                                        MatSOR_MPIAIJ,
2672                                        MatTranspose_MPIAIJ,
2673                                 /*15*/ MatGetInfo_MPIAIJ,
2674                                        MatEqual_MPIAIJ,
2675                                        MatGetDiagonal_MPIAIJ,
2676                                        MatDiagonalScale_MPIAIJ,
2677                                        MatNorm_MPIAIJ,
2678                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2679                                        MatAssemblyEnd_MPIAIJ,
2680                                        MatSetOption_MPIAIJ,
2681                                        MatZeroEntries_MPIAIJ,
2682                                 /*24*/ MatZeroRows_MPIAIJ,
2683                                        0,
2684 #if defined(PETSC_HAVE_PBGL)
2685                                        0,
2686 #else
2687                                        0,
2688 #endif
2689                                        0,
2690                                        0,
2691                                 /*29*/ MatSetUp_MPIAIJ,
2692 #if defined(PETSC_HAVE_PBGL)
2693                                        0,
2694 #else
2695                                        0,
2696 #endif
2697                                        0,
2698                                        0,
2699                                        0,
2700                                 /*34*/ MatDuplicate_MPIAIJ,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                 /*39*/ MatAXPY_MPIAIJ,
2706                                        MatGetSubMatrices_MPIAIJ,
2707                                        MatIncreaseOverlap_MPIAIJ,
2708                                        MatGetValues_MPIAIJ,
2709                                        MatCopy_MPIAIJ,
2710                                 /*44*/ MatGetRowMax_MPIAIJ,
2711                                        MatScale_MPIAIJ,
2712                                        MatShift_MPIAIJ,
2713                                        MatDiagonalSet_MPIAIJ,
2714                                        MatZeroRowsColumns_MPIAIJ,
2715                                 /*49*/ MatSetRandom_MPIAIJ,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2721                                        0,
2722                                        MatSetUnfactored_MPIAIJ,
2723                                        MatPermute_MPIAIJ,
2724                                        0,
2725                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2726                                        MatDestroy_MPIAIJ,
2727                                        MatView_MPIAIJ,
2728                                        0,
2729                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2730                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2731                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2736                                        MatGetRowMinAbs_MPIAIJ,
2737                                        0,
2738                                        MatSetColoring_MPIAIJ,
2739                                        0,
2740                                        MatSetValuesAdifor_MPIAIJ,
2741                                 /*75*/ MatFDColoringApply_AIJ,
2742                                        MatSetFromOptions_MPIAIJ,
2743                                        0,
2744                                        0,
2745                                        MatFindZeroDiagonals_MPIAIJ,
2746                                 /*80*/ 0,
2747                                        0,
2748                                        0,
2749                                 /*83*/ MatLoad_MPIAIJ,
2750                                        0,
2751                                        0,
2752                                        0,
2753                                        0,
2754                                        0,
2755                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2756                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2757                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2758                                        MatPtAP_MPIAIJ_MPIAIJ,
2759                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2760                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2761                                        0,
2762                                        0,
2763                                        0,
2764                                        0,
2765                                 /*99*/ 0,
2766                                        0,
2767                                        0,
2768                                        MatConjugate_MPIAIJ,
2769                                        0,
2770                                 /*104*/MatSetValuesRow_MPIAIJ,
2771                                        MatRealPart_MPIAIJ,
2772                                        MatImaginaryPart_MPIAIJ,
2773                                        0,
2774                                        0,
2775                                 /*109*/0,
2776                                        0,
2777                                        MatGetRowMin_MPIAIJ,
2778                                        0,
2779                                        0,
2780                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2781                                        0,
2782                                        0,
2783                                        0,
2784                                        0,
2785                                 /*119*/0,
2786                                        0,
2787                                        0,
2788                                        0,
2789                                        MatGetMultiProcBlock_MPIAIJ,
2790                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2791                                        MatGetColumnNorms_MPIAIJ,
2792                                        MatInvertBlockDiagonal_MPIAIJ,
2793                                        0,
2794                                        MatGetSubMatricesMPI_MPIAIJ,
2795                                 /*129*/0,
2796                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2797                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2798                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2799                                        0,
2800                                 /*134*/0,
2801                                        0,
2802                                        0,
2803                                        0,
2804                                        0,
2805                                 /*139*/0,
2806                                        0,
2807                                        0,
2808                                        MatFDColoringSetUp_MPIXAIJ,
2809                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2810                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2811 };
2812 
2813 /* ----------------------------------------------------------------------------------------*/
2814 
2815 #undef __FUNCT__
2816 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2817 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2818 {
2819   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2820   PetscErrorCode ierr;
2821 
2822   PetscFunctionBegin;
2823   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2824   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2830 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2831 {
2832   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2833   PetscErrorCode ierr;
2834 
2835   PetscFunctionBegin;
2836   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2837   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 #undef __FUNCT__
2842 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2843 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2844 {
2845   Mat_MPIAIJ     *b;
2846   PetscErrorCode ierr;
2847 
2848   PetscFunctionBegin;
2849   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2850   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2851   b = (Mat_MPIAIJ*)B->data;
2852 
2853   if (!B->preallocated) {
2854     /* Explicitly create 2 MATSEQAIJ matrices. */
2855     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2856     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2857     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2858     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2859     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2860     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2861     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2862     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2863     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2864     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2865   }
2866 
2867   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2868   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2869   B->preallocated = PETSC_TRUE;
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 #undef __FUNCT__
2874 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2875 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2876 {
2877   Mat            mat;
2878   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2879   PetscErrorCode ierr;
2880 
2881   PetscFunctionBegin;
2882   *newmat = 0;
2883   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2884   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2885   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2886   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2887   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2888   a       = (Mat_MPIAIJ*)mat->data;
2889 
2890   mat->factortype   = matin->factortype;
2891   mat->assembled    = PETSC_TRUE;
2892   mat->insertmode   = NOT_SET_VALUES;
2893   mat->preallocated = PETSC_TRUE;
2894 
2895   a->size         = oldmat->size;
2896   a->rank         = oldmat->rank;
2897   a->donotstash   = oldmat->donotstash;
2898   a->roworiented  = oldmat->roworiented;
2899   a->rowindices   = 0;
2900   a->rowvalues    = 0;
2901   a->getrowactive = PETSC_FALSE;
2902 
2903   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2904   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2905 
2906   if (oldmat->colmap) {
2907 #if defined(PETSC_USE_CTABLE)
2908     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2909 #else
2910     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2911     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2912     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2913 #endif
2914   } else a->colmap = 0;
2915   if (oldmat->garray) {
2916     PetscInt len;
2917     len  = oldmat->B->cmap->n;
2918     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2919     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2920     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2921   } else a->garray = 0;
2922 
2923   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2924   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2925   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2926   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2927   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2928   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2929   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2930   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2931   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2932   *newmat = mat;
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 
2937 
2938 #undef __FUNCT__
2939 #define __FUNCT__ "MatLoad_MPIAIJ"
2940 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2941 {
2942   PetscScalar    *vals,*svals;
2943   MPI_Comm       comm;
2944   PetscErrorCode ierr;
2945   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2946   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2947   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2948   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2949   PetscInt       cend,cstart,n,*rowners;
2950   int            fd;
2951   PetscInt       bs = newMat->rmap->bs;
2952 
2953   PetscFunctionBegin;
2954   /* force binary viewer to load .info file if it has not yet done so */
2955   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2956   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2957   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2958   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2959   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2960   if (!rank) {
2961     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2962     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2963   }
2964 
2965   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2966   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2967   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2968   if (bs < 0) bs = 1;
2969 
2970   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2971   M    = header[1]; N = header[2];
2972 
2973   /* If global sizes are set, check if they are consistent with that given in the file */
2974   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2975   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2976 
2977   /* determine ownership of all (block) rows */
2978   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2979   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2980   else m = newMat->rmap->n; /* Set by user */
2981 
2982   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2983   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2984 
2985   /* First process needs enough room for process with most rows */
2986   if (!rank) {
2987     mmax = rowners[1];
2988     for (i=2; i<=size; i++) {
2989       mmax = PetscMax(mmax, rowners[i]);
2990     }
2991   } else mmax = -1;             /* unused, but compilers complain */
2992 
2993   rowners[0] = 0;
2994   for (i=2; i<=size; i++) {
2995     rowners[i] += rowners[i-1];
2996   }
2997   rstart = rowners[rank];
2998   rend   = rowners[rank+1];
2999 
3000   /* distribute row lengths to all processors */
3001   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3002   if (!rank) {
3003     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3004     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3005     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3006     for (j=0; j<m; j++) {
3007       procsnz[0] += ourlens[j];
3008     }
3009     for (i=1; i<size; i++) {
3010       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3011       /* calculate the number of nonzeros on each processor */
3012       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3013         procsnz[i] += rowlengths[j];
3014       }
3015       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3016     }
3017     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3018   } else {
3019     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3020   }
3021 
3022   if (!rank) {
3023     /* determine max buffer needed and allocate it */
3024     maxnz = 0;
3025     for (i=0; i<size; i++) {
3026       maxnz = PetscMax(maxnz,procsnz[i]);
3027     }
3028     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3029 
3030     /* read in my part of the matrix column indices  */
3031     nz   = procsnz[0];
3032     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3033     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3034 
3035     /* read in every one elses and ship off */
3036     for (i=1; i<size; i++) {
3037       nz   = procsnz[i];
3038       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3039       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3040     }
3041     ierr = PetscFree(cols);CHKERRQ(ierr);
3042   } else {
3043     /* determine buffer space needed for message */
3044     nz = 0;
3045     for (i=0; i<m; i++) {
3046       nz += ourlens[i];
3047     }
3048     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3049 
3050     /* receive message of column indices*/
3051     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3052   }
3053 
3054   /* determine column ownership if matrix is not square */
3055   if (N != M) {
3056     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3057     else n = newMat->cmap->n;
3058     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3059     cstart = cend - n;
3060   } else {
3061     cstart = rstart;
3062     cend   = rend;
3063     n      = cend - cstart;
3064   }
3065 
3066   /* loop over local rows, determining number of off diagonal entries */
3067   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3068   jj   = 0;
3069   for (i=0; i<m; i++) {
3070     for (j=0; j<ourlens[i]; j++) {
3071       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3072       jj++;
3073     }
3074   }
3075 
3076   for (i=0; i<m; i++) {
3077     ourlens[i] -= offlens[i];
3078   }
3079   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3080 
3081   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3082 
3083   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3084 
3085   for (i=0; i<m; i++) {
3086     ourlens[i] += offlens[i];
3087   }
3088 
3089   if (!rank) {
3090     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3091 
3092     /* read in my part of the matrix numerical values  */
3093     nz   = procsnz[0];
3094     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3095 
3096     /* insert into matrix */
3097     jj      = rstart;
3098     smycols = mycols;
3099     svals   = vals;
3100     for (i=0; i<m; i++) {
3101       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3102       smycols += ourlens[i];
3103       svals   += ourlens[i];
3104       jj++;
3105     }
3106 
3107     /* read in other processors and ship out */
3108     for (i=1; i<size; i++) {
3109       nz   = procsnz[i];
3110       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3111       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3112     }
3113     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3114   } else {
3115     /* receive numeric values */
3116     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3117 
3118     /* receive message of values*/
3119     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3120 
3121     /* insert into matrix */
3122     jj      = rstart;
3123     smycols = mycols;
3124     svals   = vals;
3125     for (i=0; i<m; i++) {
3126       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3127       smycols += ourlens[i];
3128       svals   += ourlens[i];
3129       jj++;
3130     }
3131   }
3132   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3133   ierr = PetscFree(vals);CHKERRQ(ierr);
3134   ierr = PetscFree(mycols);CHKERRQ(ierr);
3135   ierr = PetscFree(rowners);CHKERRQ(ierr);
3136   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3137   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3138   PetscFunctionReturn(0);
3139 }
3140 
3141 #undef __FUNCT__
3142 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3143 /* TODO: Not scalable because of ISAllGather(). */
3144 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3145 {
3146   PetscErrorCode ierr;
3147   IS             iscol_local;
3148   PetscInt       csize;
3149 
3150   PetscFunctionBegin;
3151   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3152   if (call == MAT_REUSE_MATRIX) {
3153     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3154     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3155   } else {
3156     PetscInt cbs;
3157     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3158     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3159     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3160   }
3161   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3162   if (call == MAT_INITIAL_MATRIX) {
3163     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3164     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3165   }
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3170 #undef __FUNCT__
3171 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3172 /*
3173     Not great since it makes two copies of the submatrix, first an SeqAIJ
3174   in local and then by concatenating the local matrices the end result.
3175   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3176 
3177   Note: This requires a sequential iscol with all indices.
3178 */
3179 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3180 {
3181   PetscErrorCode ierr;
3182   PetscMPIInt    rank,size;
3183   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3184   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3185   PetscBool      allcolumns, colflag;
3186   Mat            M,Mreuse;
3187   MatScalar      *vwork,*aa;
3188   MPI_Comm       comm;
3189   Mat_SeqAIJ     *aij;
3190 
3191   PetscFunctionBegin;
3192   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3193   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3195 
3196   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3197   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3198   if (colflag && ncol == mat->cmap->N) {
3199     allcolumns = PETSC_TRUE;
3200   } else {
3201     allcolumns = PETSC_FALSE;
3202   }
3203   if (call ==  MAT_REUSE_MATRIX) {
3204     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3205     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3206     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3207   } else {
3208     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3209   }
3210 
3211   /*
3212       m - number of local rows
3213       n - number of columns (same on all processors)
3214       rstart - first row in new global matrix generated
3215   */
3216   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3217   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3218   if (call == MAT_INITIAL_MATRIX) {
3219     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3220     ii  = aij->i;
3221     jj  = aij->j;
3222 
3223     /*
3224         Determine the number of non-zeros in the diagonal and off-diagonal
3225         portions of the matrix in order to do correct preallocation
3226     */
3227 
3228     /* first get start and end of "diagonal" columns */
3229     if (csize == PETSC_DECIDE) {
3230       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3231       if (mglobal == n) { /* square matrix */
3232         nlocal = m;
3233       } else {
3234         nlocal = n/size + ((n % size) > rank);
3235       }
3236     } else {
3237       nlocal = csize;
3238     }
3239     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3240     rstart = rend - nlocal;
3241     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3242 
3243     /* next, compute all the lengths */
3244     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3245     olens = dlens + m;
3246     for (i=0; i<m; i++) {
3247       jend = ii[i+1] - ii[i];
3248       olen = 0;
3249       dlen = 0;
3250       for (j=0; j<jend; j++) {
3251         if (*jj < rstart || *jj >= rend) olen++;
3252         else dlen++;
3253         jj++;
3254       }
3255       olens[i] = olen;
3256       dlens[i] = dlen;
3257     }
3258     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3259     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3260     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3261     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3262     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3263     ierr = PetscFree(dlens);CHKERRQ(ierr);
3264   } else {
3265     PetscInt ml,nl;
3266 
3267     M    = *newmat;
3268     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3269     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3270     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3271     /*
3272          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3273        rather than the slower MatSetValues().
3274     */
3275     M->was_assembled = PETSC_TRUE;
3276     M->assembled     = PETSC_FALSE;
3277   }
3278   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3279   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3280   ii   = aij->i;
3281   jj   = aij->j;
3282   aa   = aij->a;
3283   for (i=0; i<m; i++) {
3284     row   = rstart + i;
3285     nz    = ii[i+1] - ii[i];
3286     cwork = jj;     jj += nz;
3287     vwork = aa;     aa += nz;
3288     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3289   }
3290 
3291   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3292   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3293   *newmat = M;
3294 
3295   /* save submatrix used in processor for next request */
3296   if (call ==  MAT_INITIAL_MATRIX) {
3297     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3298     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3299   }
3300   PetscFunctionReturn(0);
3301 }
3302 
3303 #undef __FUNCT__
3304 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3305 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3306 {
3307   PetscInt       m,cstart, cend,j,nnz,i,d;
3308   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3309   const PetscInt *JJ;
3310   PetscScalar    *values;
3311   PetscErrorCode ierr;
3312 
3313   PetscFunctionBegin;
3314   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3315 
3316   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3317   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3318   m      = B->rmap->n;
3319   cstart = B->cmap->rstart;
3320   cend   = B->cmap->rend;
3321   rstart = B->rmap->rstart;
3322 
3323   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3324 
3325 #if defined(PETSC_USE_DEBUGGING)
3326   for (i=0; i<m; i++) {
3327     nnz = Ii[i+1]- Ii[i];
3328     JJ  = J + Ii[i];
3329     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3330     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3331     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3332   }
3333 #endif
3334 
3335   for (i=0; i<m; i++) {
3336     nnz     = Ii[i+1]- Ii[i];
3337     JJ      = J + Ii[i];
3338     nnz_max = PetscMax(nnz_max,nnz);
3339     d       = 0;
3340     for (j=0; j<nnz; j++) {
3341       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3342     }
3343     d_nnz[i] = d;
3344     o_nnz[i] = nnz - d;
3345   }
3346   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3347   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3348 
3349   if (v) values = (PetscScalar*)v;
3350   else {
3351     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3352   }
3353 
3354   for (i=0; i<m; i++) {
3355     ii   = i + rstart;
3356     nnz  = Ii[i+1]- Ii[i];
3357     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3358   }
3359   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3360   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3361 
3362   if (!v) {
3363     ierr = PetscFree(values);CHKERRQ(ierr);
3364   }
3365   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3366   PetscFunctionReturn(0);
3367 }
3368 
3369 #undef __FUNCT__
3370 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3371 /*@
3372    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3373    (the default parallel PETSc format).
3374 
3375    Collective on MPI_Comm
3376 
3377    Input Parameters:
3378 +  B - the matrix
3379 .  i - the indices into j for the start of each local row (starts with zero)
3380 .  j - the column indices for each local row (starts with zero)
3381 -  v - optional values in the matrix
3382 
3383    Level: developer
3384 
3385    Notes:
3386        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3387      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3388      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3389 
3390        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3391 
3392        The format which is used for the sparse matrix input, is equivalent to a
3393     row-major ordering.. i.e for the following matrix, the input data expected is
3394     as shown:
3395 
3396         1 0 0
3397         2 0 3     P0
3398        -------
3399         4 5 6     P1
3400 
3401      Process0 [P0]: rows_owned=[0,1]
3402         i =  {0,1,3}  [size = nrow+1  = 2+1]
3403         j =  {0,0,2}  [size = nz = 6]
3404         v =  {1,2,3}  [size = nz = 6]
3405 
3406      Process1 [P1]: rows_owned=[2]
3407         i =  {0,3}    [size = nrow+1  = 1+1]
3408         j =  {0,1,2}  [size = nz = 6]
3409         v =  {4,5,6}  [size = nz = 6]
3410 
3411 .keywords: matrix, aij, compressed row, sparse, parallel
3412 
3413 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3414           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3415 @*/
3416 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3417 {
3418   PetscErrorCode ierr;
3419 
3420   PetscFunctionBegin;
3421   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3422   PetscFunctionReturn(0);
3423 }
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3427 /*@C
3428    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3429    (the default parallel PETSc format).  For good matrix assembly performance
3430    the user should preallocate the matrix storage by setting the parameters
3431    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3432    performance can be increased by more than a factor of 50.
3433 
3434    Collective on MPI_Comm
3435 
3436    Input Parameters:
3437 +  B - the matrix
3438 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3439            (same value is used for all local rows)
3440 .  d_nnz - array containing the number of nonzeros in the various rows of the
3441            DIAGONAL portion of the local submatrix (possibly different for each row)
3442            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3443            The size of this array is equal to the number of local rows, i.e 'm'.
3444            For matrices that will be factored, you must leave room for (and set)
3445            the diagonal entry even if it is zero.
3446 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3447            submatrix (same value is used for all local rows).
3448 -  o_nnz - array containing the number of nonzeros in the various rows of the
3449            OFF-DIAGONAL portion of the local submatrix (possibly different for
3450            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3451            structure. The size of this array is equal to the number
3452            of local rows, i.e 'm'.
3453 
3454    If the *_nnz parameter is given then the *_nz parameter is ignored
3455 
3456    The AIJ format (also called the Yale sparse matrix format or
3457    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3458    storage.  The stored row and column indices begin with zero.
3459    See Users-Manual: ch_mat for details.
3460 
3461    The parallel matrix is partitioned such that the first m0 rows belong to
3462    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3463    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3464 
3465    The DIAGONAL portion of the local submatrix of a processor can be defined
3466    as the submatrix which is obtained by extraction the part corresponding to
3467    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3468    first row that belongs to the processor, r2 is the last row belonging to
3469    the this processor, and c1-c2 is range of indices of the local part of a
3470    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3471    common case of a square matrix, the row and column ranges are the same and
3472    the DIAGONAL part is also square. The remaining portion of the local
3473    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3474 
3475    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3476 
3477    You can call MatGetInfo() to get information on how effective the preallocation was;
3478    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3479    You can also run with the option -info and look for messages with the string
3480    malloc in them to see if additional memory allocation was needed.
3481 
3482    Example usage:
3483 
3484    Consider the following 8x8 matrix with 34 non-zero values, that is
3485    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3486    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3487    as follows:
3488 
3489 .vb
3490             1  2  0  |  0  3  0  |  0  4
3491     Proc0   0  5  6  |  7  0  0  |  8  0
3492             9  0 10  | 11  0  0  | 12  0
3493     -------------------------------------
3494            13  0 14  | 15 16 17  |  0  0
3495     Proc1   0 18  0  | 19 20 21  |  0  0
3496             0  0  0  | 22 23  0  | 24  0
3497     -------------------------------------
3498     Proc2  25 26 27  |  0  0 28  | 29  0
3499            30  0  0  | 31 32 33  |  0 34
3500 .ve
3501 
3502    This can be represented as a collection of submatrices as:
3503 
3504 .vb
3505       A B C
3506       D E F
3507       G H I
3508 .ve
3509 
3510    Where the submatrices A,B,C are owned by proc0, D,E,F are
3511    owned by proc1, G,H,I are owned by proc2.
3512 
3513    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3514    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3515    The 'M','N' parameters are 8,8, and have the same values on all procs.
3516 
3517    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3518    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3519    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3520    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3521    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3522    matrix, ans [DF] as another SeqAIJ matrix.
3523 
3524    When d_nz, o_nz parameters are specified, d_nz storage elements are
3525    allocated for every row of the local diagonal submatrix, and o_nz
3526    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3527    One way to choose d_nz and o_nz is to use the max nonzerors per local
3528    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3529    In this case, the values of d_nz,o_nz are:
3530 .vb
3531      proc0 : dnz = 2, o_nz = 2
3532      proc1 : dnz = 3, o_nz = 2
3533      proc2 : dnz = 1, o_nz = 4
3534 .ve
3535    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3536    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3537    for proc3. i.e we are using 12+15+10=37 storage locations to store
3538    34 values.
3539 
3540    When d_nnz, o_nnz parameters are specified, the storage is specified
3541    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3542    In the above case the values for d_nnz,o_nnz are:
3543 .vb
3544      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3545      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3546      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3547 .ve
3548    Here the space allocated is sum of all the above values i.e 34, and
3549    hence pre-allocation is perfect.
3550 
3551    Level: intermediate
3552 
3553 .keywords: matrix, aij, compressed row, sparse, parallel
3554 
3555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3556           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3557 @*/
3558 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3559 {
3560   PetscErrorCode ierr;
3561 
3562   PetscFunctionBegin;
3563   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3564   PetscValidType(B,1);
3565   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3566   PetscFunctionReturn(0);
3567 }
3568 
3569 #undef __FUNCT__
3570 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3571 /*@
3572      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3573          CSR format the local rows.
3574 
3575    Collective on MPI_Comm
3576 
3577    Input Parameters:
3578 +  comm - MPI communicator
3579 .  m - number of local rows (Cannot be PETSC_DECIDE)
3580 .  n - This value should be the same as the local size used in creating the
3581        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3582        calculated if N is given) For square matrices n is almost always m.
3583 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3584 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3585 .   i - row indices
3586 .   j - column indices
3587 -   a - matrix values
3588 
3589    Output Parameter:
3590 .   mat - the matrix
3591 
3592    Level: intermediate
3593 
3594    Notes:
3595        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3596      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3597      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3598 
3599        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3600 
3601        The format which is used for the sparse matrix input, is equivalent to a
3602     row-major ordering.. i.e for the following matrix, the input data expected is
3603     as shown:
3604 
3605         1 0 0
3606         2 0 3     P0
3607        -------
3608         4 5 6     P1
3609 
3610      Process0 [P0]: rows_owned=[0,1]
3611         i =  {0,1,3}  [size = nrow+1  = 2+1]
3612         j =  {0,0,2}  [size = nz = 6]
3613         v =  {1,2,3}  [size = nz = 6]
3614 
3615      Process1 [P1]: rows_owned=[2]
3616         i =  {0,3}    [size = nrow+1  = 1+1]
3617         j =  {0,1,2}  [size = nz = 6]
3618         v =  {4,5,6}  [size = nz = 6]
3619 
3620 .keywords: matrix, aij, compressed row, sparse, parallel
3621 
3622 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3623           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3624 @*/
3625 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3626 {
3627   PetscErrorCode ierr;
3628 
3629   PetscFunctionBegin;
3630   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3631   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3632   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3633   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3634   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3635   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3636   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3637   PetscFunctionReturn(0);
3638 }
3639 
3640 #undef __FUNCT__
3641 #define __FUNCT__ "MatCreateAIJ"
3642 /*@C
3643    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3644    (the default parallel PETSc format).  For good matrix assembly performance
3645    the user should preallocate the matrix storage by setting the parameters
3646    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3647    performance can be increased by more than a factor of 50.
3648 
3649    Collective on MPI_Comm
3650 
3651    Input Parameters:
3652 +  comm - MPI communicator
3653 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3654            This value should be the same as the local size used in creating the
3655            y vector for the matrix-vector product y = Ax.
3656 .  n - This value should be the same as the local size used in creating the
3657        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3658        calculated if N is given) For square matrices n is almost always m.
3659 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3660 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3661 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3662            (same value is used for all local rows)
3663 .  d_nnz - array containing the number of nonzeros in the various rows of the
3664            DIAGONAL portion of the local submatrix (possibly different for each row)
3665            or NULL, if d_nz is used to specify the nonzero structure.
3666            The size of this array is equal to the number of local rows, i.e 'm'.
3667 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3668            submatrix (same value is used for all local rows).
3669 -  o_nnz - array containing the number of nonzeros in the various rows of the
3670            OFF-DIAGONAL portion of the local submatrix (possibly different for
3671            each row) or NULL, if o_nz is used to specify the nonzero
3672            structure. The size of this array is equal to the number
3673            of local rows, i.e 'm'.
3674 
3675    Output Parameter:
3676 .  A - the matrix
3677 
3678    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3679    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3680    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3681 
3682    Notes:
3683    If the *_nnz parameter is given then the *_nz parameter is ignored
3684 
3685    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3686    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3687    storage requirements for this matrix.
3688 
3689    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3690    processor than it must be used on all processors that share the object for
3691    that argument.
3692 
3693    The user MUST specify either the local or global matrix dimensions
3694    (possibly both).
3695 
3696    The parallel matrix is partitioned across processors such that the
3697    first m0 rows belong to process 0, the next m1 rows belong to
3698    process 1, the next m2 rows belong to process 2 etc.. where
3699    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3700    values corresponding to [m x N] submatrix.
3701 
3702    The columns are logically partitioned with the n0 columns belonging
3703    to 0th partition, the next n1 columns belonging to the next
3704    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3705 
3706    The DIAGONAL portion of the local submatrix on any given processor
3707    is the submatrix corresponding to the rows and columns m,n
3708    corresponding to the given processor. i.e diagonal matrix on
3709    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3710    etc. The remaining portion of the local submatrix [m x (N-n)]
3711    constitute the OFF-DIAGONAL portion. The example below better
3712    illustrates this concept.
3713 
3714    For a square global matrix we define each processor's diagonal portion
3715    to be its local rows and the corresponding columns (a square submatrix);
3716    each processor's off-diagonal portion encompasses the remainder of the
3717    local matrix (a rectangular submatrix).
3718 
3719    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3720 
3721    When calling this routine with a single process communicator, a matrix of
3722    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3723    type of communicator, use the construction mechanism:
3724      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3725 
3726    By default, this format uses inodes (identical nodes) when possible.
3727    We search for consecutive rows with the same nonzero structure, thereby
3728    reusing matrix information to achieve increased efficiency.
3729 
3730    Options Database Keys:
3731 +  -mat_no_inode  - Do not use inodes
3732 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3733 -  -mat_aij_oneindex - Internally use indexing starting at 1
3734         rather than 0.  Note that when calling MatSetValues(),
3735         the user still MUST index entries starting at 0!
3736 
3737 
3738    Example usage:
3739 
3740    Consider the following 8x8 matrix with 34 non-zero values, that is
3741    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3742    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3743    as follows:
3744 
3745 .vb
3746             1  2  0  |  0  3  0  |  0  4
3747     Proc0   0  5  6  |  7  0  0  |  8  0
3748             9  0 10  | 11  0  0  | 12  0
3749     -------------------------------------
3750            13  0 14  | 15 16 17  |  0  0
3751     Proc1   0 18  0  | 19 20 21  |  0  0
3752             0  0  0  | 22 23  0  | 24  0
3753     -------------------------------------
3754     Proc2  25 26 27  |  0  0 28  | 29  0
3755            30  0  0  | 31 32 33  |  0 34
3756 .ve
3757 
3758    This can be represented as a collection of submatrices as:
3759 
3760 .vb
3761       A B C
3762       D E F
3763       G H I
3764 .ve
3765 
3766    Where the submatrices A,B,C are owned by proc0, D,E,F are
3767    owned by proc1, G,H,I are owned by proc2.
3768 
3769    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3770    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3771    The 'M','N' parameters are 8,8, and have the same values on all procs.
3772 
3773    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3774    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3775    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3776    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3777    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3778    matrix, ans [DF] as another SeqAIJ matrix.
3779 
3780    When d_nz, o_nz parameters are specified, d_nz storage elements are
3781    allocated for every row of the local diagonal submatrix, and o_nz
3782    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3783    One way to choose d_nz and o_nz is to use the max nonzerors per local
3784    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3785    In this case, the values of d_nz,o_nz are:
3786 .vb
3787      proc0 : dnz = 2, o_nz = 2
3788      proc1 : dnz = 3, o_nz = 2
3789      proc2 : dnz = 1, o_nz = 4
3790 .ve
3791    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3792    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3793    for proc3. i.e we are using 12+15+10=37 storage locations to store
3794    34 values.
3795 
3796    When d_nnz, o_nnz parameters are specified, the storage is specified
3797    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3798    In the above case the values for d_nnz,o_nnz are:
3799 .vb
3800      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3801      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3802      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3803 .ve
3804    Here the space allocated is sum of all the above values i.e 34, and
3805    hence pre-allocation is perfect.
3806 
3807    Level: intermediate
3808 
3809 .keywords: matrix, aij, compressed row, sparse, parallel
3810 
3811 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3812           MPIAIJ, MatCreateMPIAIJWithArrays()
3813 @*/
3814 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3815 {
3816   PetscErrorCode ierr;
3817   PetscMPIInt    size;
3818 
3819   PetscFunctionBegin;
3820   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3821   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3823   if (size > 1) {
3824     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3825     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3826   } else {
3827     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3828     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3829   }
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 #undef __FUNCT__
3834 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3835 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3836 {
3837   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3838 
3839   PetscFunctionBegin;
3840   if (Ad)     *Ad     = a->A;
3841   if (Ao)     *Ao     = a->B;
3842   if (colmap) *colmap = a->garray;
3843   PetscFunctionReturn(0);
3844 }
3845 
3846 #undef __FUNCT__
3847 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3848 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3849 {
3850   PetscErrorCode ierr;
3851   PetscInt       i;
3852   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3853 
3854   PetscFunctionBegin;
3855   if (coloring->ctype == IS_COLORING_GLOBAL) {
3856     ISColoringValue *allcolors,*colors;
3857     ISColoring      ocoloring;
3858 
3859     /* set coloring for diagonal portion */
3860     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3861 
3862     /* set coloring for off-diagonal portion */
3863     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3864     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3865     for (i=0; i<a->B->cmap->n; i++) {
3866       colors[i] = allcolors[a->garray[i]];
3867     }
3868     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3869     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3870     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3871     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3872   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3873     ISColoringValue *colors;
3874     PetscInt        *larray;
3875     ISColoring      ocoloring;
3876 
3877     /* set coloring for diagonal portion */
3878     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3879     for (i=0; i<a->A->cmap->n; i++) {
3880       larray[i] = i + A->cmap->rstart;
3881     }
3882     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3883     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3884     for (i=0; i<a->A->cmap->n; i++) {
3885       colors[i] = coloring->colors[larray[i]];
3886     }
3887     ierr = PetscFree(larray);CHKERRQ(ierr);
3888     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3889     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3890     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3891 
3892     /* set coloring for off-diagonal portion */
3893     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3894     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3895     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3896     for (i=0; i<a->B->cmap->n; i++) {
3897       colors[i] = coloring->colors[larray[i]];
3898     }
3899     ierr = PetscFree(larray);CHKERRQ(ierr);
3900     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3901     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3902     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3903   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3904   PetscFunctionReturn(0);
3905 }
3906 
3907 #undef __FUNCT__
3908 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3909 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3910 {
3911   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3912   PetscErrorCode ierr;
3913 
3914   PetscFunctionBegin;
3915   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3916   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3917   PetscFunctionReturn(0);
3918 }
3919 
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3922 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3923 {
3924   PetscErrorCode ierr;
3925   PetscInt       m,N,i,rstart,nnz,Ii;
3926   PetscInt       *indx;
3927   PetscScalar    *values;
3928 
3929   PetscFunctionBegin;
3930   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3931   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3932     PetscInt       *dnz,*onz,sum,bs,cbs;
3933 
3934     if (n == PETSC_DECIDE) {
3935       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3936     }
3937     /* Check sum(n) = N */
3938     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3939     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3940 
3941     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3942     rstart -= m;
3943 
3944     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3945     for (i=0; i<m; i++) {
3946       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3947       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3948       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3949     }
3950 
3951     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3952     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3953     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3954     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3955     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3956     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3957     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3958   }
3959 
3960   /* numeric phase */
3961   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3962   for (i=0; i<m; i++) {
3963     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3964     Ii   = i + rstart;
3965     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3966     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3967   }
3968   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3969   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3970   PetscFunctionReturn(0);
3971 }
3972 
3973 #undef __FUNCT__
3974 #define __FUNCT__ "MatFileSplit"
3975 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3976 {
3977   PetscErrorCode    ierr;
3978   PetscMPIInt       rank;
3979   PetscInt          m,N,i,rstart,nnz;
3980   size_t            len;
3981   const PetscInt    *indx;
3982   PetscViewer       out;
3983   char              *name;
3984   Mat               B;
3985   const PetscScalar *values;
3986 
3987   PetscFunctionBegin;
3988   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3989   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3990   /* Should this be the type of the diagonal block of A? */
3991   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3992   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3993   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3994   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3995   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3996   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3997   for (i=0; i<m; i++) {
3998     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3999     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4000     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4001   }
4002   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4003   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4004 
4005   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4006   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4007   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4008   sprintf(name,"%s.%d",outfile,rank);
4009   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4010   ierr = PetscFree(name);CHKERRQ(ierr);
4011   ierr = MatView(B,out);CHKERRQ(ierr);
4012   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4013   ierr = MatDestroy(&B);CHKERRQ(ierr);
4014   PetscFunctionReturn(0);
4015 }
4016 
4017 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4018 #undef __FUNCT__
4019 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4020 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4021 {
4022   PetscErrorCode      ierr;
4023   Mat_Merge_SeqsToMPI *merge;
4024   PetscContainer      container;
4025 
4026   PetscFunctionBegin;
4027   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4028   if (container) {
4029     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4030     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4031     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4032     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4033     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4034     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4035     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4036     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4037     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4038     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4039     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4040     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4041     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4042     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4043     ierr = PetscFree(merge);CHKERRQ(ierr);
4044     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4045   }
4046   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4047   PetscFunctionReturn(0);
4048 }
4049 
4050 #include <../src/mat/utils/freespace.h>
4051 #include <petscbt.h>
4052 
4053 #undef __FUNCT__
4054 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4055 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4056 {
4057   PetscErrorCode      ierr;
4058   MPI_Comm            comm;
4059   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4060   PetscMPIInt         size,rank,taga,*len_s;
4061   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4062   PetscInt            proc,m;
4063   PetscInt            **buf_ri,**buf_rj;
4064   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4065   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4066   MPI_Request         *s_waits,*r_waits;
4067   MPI_Status          *status;
4068   MatScalar           *aa=a->a;
4069   MatScalar           **abuf_r,*ba_i;
4070   Mat_Merge_SeqsToMPI *merge;
4071   PetscContainer      container;
4072 
4073   PetscFunctionBegin;
4074   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4075   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4076 
4077   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4078   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4079 
4080   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4081   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4082 
4083   bi     = merge->bi;
4084   bj     = merge->bj;
4085   buf_ri = merge->buf_ri;
4086   buf_rj = merge->buf_rj;
4087 
4088   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4089   owners = merge->rowmap->range;
4090   len_s  = merge->len_s;
4091 
4092   /* send and recv matrix values */
4093   /*-----------------------------*/
4094   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4095   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4096 
4097   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4098   for (proc=0,k=0; proc<size; proc++) {
4099     if (!len_s[proc]) continue;
4100     i    = owners[proc];
4101     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4102     k++;
4103   }
4104 
4105   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4106   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4107   ierr = PetscFree(status);CHKERRQ(ierr);
4108 
4109   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4110   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4111 
4112   /* insert mat values of mpimat */
4113   /*----------------------------*/
4114   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4115   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4116 
4117   for (k=0; k<merge->nrecv; k++) {
4118     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4119     nrows       = *(buf_ri_k[k]);
4120     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4121     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4122   }
4123 
4124   /* set values of ba */
4125   m = merge->rowmap->n;
4126   for (i=0; i<m; i++) {
4127     arow = owners[rank] + i;
4128     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4129     bnzi = bi[i+1] - bi[i];
4130     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4131 
4132     /* add local non-zero vals of this proc's seqmat into ba */
4133     anzi   = ai[arow+1] - ai[arow];
4134     aj     = a->j + ai[arow];
4135     aa     = a->a + ai[arow];
4136     nextaj = 0;
4137     for (j=0; nextaj<anzi; j++) {
4138       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4139         ba_i[j] += aa[nextaj++];
4140       }
4141     }
4142 
4143     /* add received vals into ba */
4144     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4145       /* i-th row */
4146       if (i == *nextrow[k]) {
4147         anzi   = *(nextai[k]+1) - *nextai[k];
4148         aj     = buf_rj[k] + *(nextai[k]);
4149         aa     = abuf_r[k] + *(nextai[k]);
4150         nextaj = 0;
4151         for (j=0; nextaj<anzi; j++) {
4152           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4153             ba_i[j] += aa[nextaj++];
4154           }
4155         }
4156         nextrow[k]++; nextai[k]++;
4157       }
4158     }
4159     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4160   }
4161   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4162   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4163 
4164   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4165   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4166   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4167   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4168   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4169   PetscFunctionReturn(0);
4170 }
4171 
4172 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4173 
4174 #undef __FUNCT__
4175 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4176 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4177 {
4178   PetscErrorCode      ierr;
4179   Mat                 B_mpi;
4180   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4181   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4182   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4183   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4184   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4185   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4186   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4187   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4188   MPI_Status          *status;
4189   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4190   PetscBT             lnkbt;
4191   Mat_Merge_SeqsToMPI *merge;
4192   PetscContainer      container;
4193 
4194   PetscFunctionBegin;
4195   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4196 
4197   /* make sure it is a PETSc comm */
4198   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4199   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4200   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4201 
4202   ierr = PetscNew(&merge);CHKERRQ(ierr);
4203   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4204 
4205   /* determine row ownership */
4206   /*---------------------------------------------------------*/
4207   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4208   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4209   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4210   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4211   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4212   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4213   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4214 
4215   m      = merge->rowmap->n;
4216   owners = merge->rowmap->range;
4217 
4218   /* determine the number of messages to send, their lengths */
4219   /*---------------------------------------------------------*/
4220   len_s = merge->len_s;
4221 
4222   len          = 0; /* length of buf_si[] */
4223   merge->nsend = 0;
4224   for (proc=0; proc<size; proc++) {
4225     len_si[proc] = 0;
4226     if (proc == rank) {
4227       len_s[proc] = 0;
4228     } else {
4229       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4230       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4231     }
4232     if (len_s[proc]) {
4233       merge->nsend++;
4234       nrows = 0;
4235       for (i=owners[proc]; i<owners[proc+1]; i++) {
4236         if (ai[i+1] > ai[i]) nrows++;
4237       }
4238       len_si[proc] = 2*(nrows+1);
4239       len         += len_si[proc];
4240     }
4241   }
4242 
4243   /* determine the number and length of messages to receive for ij-structure */
4244   /*-------------------------------------------------------------------------*/
4245   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4246   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4247 
4248   /* post the Irecv of j-structure */
4249   /*-------------------------------*/
4250   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4251   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4252 
4253   /* post the Isend of j-structure */
4254   /*--------------------------------*/
4255   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4256 
4257   for (proc=0, k=0; proc<size; proc++) {
4258     if (!len_s[proc]) continue;
4259     i    = owners[proc];
4260     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4261     k++;
4262   }
4263 
4264   /* receives and sends of j-structure are complete */
4265   /*------------------------------------------------*/
4266   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4267   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4268 
4269   /* send and recv i-structure */
4270   /*---------------------------*/
4271   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4272   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4273 
4274   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4275   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4276   for (proc=0,k=0; proc<size; proc++) {
4277     if (!len_s[proc]) continue;
4278     /* form outgoing message for i-structure:
4279          buf_si[0]:                 nrows to be sent
4280                [1:nrows]:           row index (global)
4281                [nrows+1:2*nrows+1]: i-structure index
4282     */
4283     /*-------------------------------------------*/
4284     nrows       = len_si[proc]/2 - 1;
4285     buf_si_i    = buf_si + nrows+1;
4286     buf_si[0]   = nrows;
4287     buf_si_i[0] = 0;
4288     nrows       = 0;
4289     for (i=owners[proc]; i<owners[proc+1]; i++) {
4290       anzi = ai[i+1] - ai[i];
4291       if (anzi) {
4292         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4293         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4294         nrows++;
4295       }
4296     }
4297     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4298     k++;
4299     buf_si += len_si[proc];
4300   }
4301 
4302   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4303   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4304 
4305   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4306   for (i=0; i<merge->nrecv; i++) {
4307     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4308   }
4309 
4310   ierr = PetscFree(len_si);CHKERRQ(ierr);
4311   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4312   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4313   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4314   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4315   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4316   ierr = PetscFree(status);CHKERRQ(ierr);
4317 
4318   /* compute a local seq matrix in each processor */
4319   /*----------------------------------------------*/
4320   /* allocate bi array and free space for accumulating nonzero column info */
4321   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4322   bi[0] = 0;
4323 
4324   /* create and initialize a linked list */
4325   nlnk = N+1;
4326   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4327 
4328   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4329   len  = ai[owners[rank+1]] - ai[owners[rank]];
4330   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4331 
4332   current_space = free_space;
4333 
4334   /* determine symbolic info for each local row */
4335   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4336 
4337   for (k=0; k<merge->nrecv; k++) {
4338     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4339     nrows       = *buf_ri_k[k];
4340     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4341     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4342   }
4343 
4344   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4345   len  = 0;
4346   for (i=0; i<m; i++) {
4347     bnzi = 0;
4348     /* add local non-zero cols of this proc's seqmat into lnk */
4349     arow  = owners[rank] + i;
4350     anzi  = ai[arow+1] - ai[arow];
4351     aj    = a->j + ai[arow];
4352     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4353     bnzi += nlnk;
4354     /* add received col data into lnk */
4355     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4356       if (i == *nextrow[k]) { /* i-th row */
4357         anzi  = *(nextai[k]+1) - *nextai[k];
4358         aj    = buf_rj[k] + *nextai[k];
4359         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4360         bnzi += nlnk;
4361         nextrow[k]++; nextai[k]++;
4362       }
4363     }
4364     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4365 
4366     /* if free space is not available, make more free space */
4367     if (current_space->local_remaining<bnzi) {
4368       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4369       nspacedouble++;
4370     }
4371     /* copy data into free space, then initialize lnk */
4372     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4373     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4374 
4375     current_space->array           += bnzi;
4376     current_space->local_used      += bnzi;
4377     current_space->local_remaining -= bnzi;
4378 
4379     bi[i+1] = bi[i] + bnzi;
4380   }
4381 
4382   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4383 
4384   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4385   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4386   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4387 
4388   /* create symbolic parallel matrix B_mpi */
4389   /*---------------------------------------*/
4390   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4391   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4392   if (n==PETSC_DECIDE) {
4393     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4394   } else {
4395     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4396   }
4397   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4398   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4399   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4400   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4401   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4402 
4403   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4404   B_mpi->assembled    = PETSC_FALSE;
4405   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4406   merge->bi           = bi;
4407   merge->bj           = bj;
4408   merge->buf_ri       = buf_ri;
4409   merge->buf_rj       = buf_rj;
4410   merge->coi          = NULL;
4411   merge->coj          = NULL;
4412   merge->owners_co    = NULL;
4413 
4414   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4415 
4416   /* attach the supporting struct to B_mpi for reuse */
4417   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4418   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4419   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4420   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4421   *mpimat = B_mpi;
4422 
4423   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 #undef __FUNCT__
4428 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4429 /*@C
4430       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4431                  matrices from each processor
4432 
4433     Collective on MPI_Comm
4434 
4435    Input Parameters:
4436 +    comm - the communicators the parallel matrix will live on
4437 .    seqmat - the input sequential matrices
4438 .    m - number of local rows (or PETSC_DECIDE)
4439 .    n - number of local columns (or PETSC_DECIDE)
4440 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4441 
4442    Output Parameter:
4443 .    mpimat - the parallel matrix generated
4444 
4445     Level: advanced
4446 
4447    Notes:
4448      The dimensions of the sequential matrix in each processor MUST be the same.
4449      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4450      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4451 @*/
4452 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4453 {
4454   PetscErrorCode ierr;
4455   PetscMPIInt    size;
4456 
4457   PetscFunctionBegin;
4458   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4459   if (size == 1) {
4460     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4461     if (scall == MAT_INITIAL_MATRIX) {
4462       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4463     } else {
4464       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4465     }
4466     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4467     PetscFunctionReturn(0);
4468   }
4469   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4470   if (scall == MAT_INITIAL_MATRIX) {
4471     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4472   }
4473   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4474   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4475   PetscFunctionReturn(0);
4476 }
4477 
4478 #undef __FUNCT__
4479 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4480 /*@
4481      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4482           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4483           with MatGetSize()
4484 
4485     Not Collective
4486 
4487    Input Parameters:
4488 +    A - the matrix
4489 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4490 
4491    Output Parameter:
4492 .    A_loc - the local sequential matrix generated
4493 
4494     Level: developer
4495 
4496 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4497 
4498 @*/
4499 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4500 {
4501   PetscErrorCode ierr;
4502   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4503   Mat_SeqAIJ     *mat,*a,*b;
4504   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4505   MatScalar      *aa,*ba,*cam;
4506   PetscScalar    *ca;
4507   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4508   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4509   PetscBool      match;
4510   MPI_Comm       comm;
4511   PetscMPIInt    size;
4512 
4513   PetscFunctionBegin;
4514   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4515   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4516   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4518   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4519 
4520   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4521   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4522   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4523   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4524   aa = a->a; ba = b->a;
4525   if (scall == MAT_INITIAL_MATRIX) {
4526     if (size == 1) {
4527       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4528       PetscFunctionReturn(0);
4529     }
4530 
4531     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4532     ci[0] = 0;
4533     for (i=0; i<am; i++) {
4534       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4535     }
4536     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4537     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4538     k    = 0;
4539     for (i=0; i<am; i++) {
4540       ncols_o = bi[i+1] - bi[i];
4541       ncols_d = ai[i+1] - ai[i];
4542       /* off-diagonal portion of A */
4543       for (jo=0; jo<ncols_o; jo++) {
4544         col = cmap[*bj];
4545         if (col >= cstart) break;
4546         cj[k]   = col; bj++;
4547         ca[k++] = *ba++;
4548       }
4549       /* diagonal portion of A */
4550       for (j=0; j<ncols_d; j++) {
4551         cj[k]   = cstart + *aj++;
4552         ca[k++] = *aa++;
4553       }
4554       /* off-diagonal portion of A */
4555       for (j=jo; j<ncols_o; j++) {
4556         cj[k]   = cmap[*bj++];
4557         ca[k++] = *ba++;
4558       }
4559     }
4560     /* put together the new matrix */
4561     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4562     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4563     /* Since these are PETSc arrays, change flags to free them as necessary. */
4564     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4565     mat->free_a  = PETSC_TRUE;
4566     mat->free_ij = PETSC_TRUE;
4567     mat->nonew   = 0;
4568   } else if (scall == MAT_REUSE_MATRIX) {
4569     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4570     ci = mat->i; cj = mat->j; cam = mat->a;
4571     for (i=0; i<am; i++) {
4572       /* off-diagonal portion of A */
4573       ncols_o = bi[i+1] - bi[i];
4574       for (jo=0; jo<ncols_o; jo++) {
4575         col = cmap[*bj];
4576         if (col >= cstart) break;
4577         *cam++ = *ba++; bj++;
4578       }
4579       /* diagonal portion of A */
4580       ncols_d = ai[i+1] - ai[i];
4581       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4582       /* off-diagonal portion of A */
4583       for (j=jo; j<ncols_o; j++) {
4584         *cam++ = *ba++; bj++;
4585       }
4586     }
4587   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4588   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4589   PetscFunctionReturn(0);
4590 }
4591 
4592 #undef __FUNCT__
4593 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4594 /*@C
4595      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4596 
4597     Not Collective
4598 
4599    Input Parameters:
4600 +    A - the matrix
4601 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4602 -    row, col - index sets of rows and columns to extract (or NULL)
4603 
4604    Output Parameter:
4605 .    A_loc - the local sequential matrix generated
4606 
4607     Level: developer
4608 
4609 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4610 
4611 @*/
4612 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4613 {
4614   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4615   PetscErrorCode ierr;
4616   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4617   IS             isrowa,iscola;
4618   Mat            *aloc;
4619   PetscBool      match;
4620 
4621   PetscFunctionBegin;
4622   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4623   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4624   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4625   if (!row) {
4626     start = A->rmap->rstart; end = A->rmap->rend;
4627     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4628   } else {
4629     isrowa = *row;
4630   }
4631   if (!col) {
4632     start = A->cmap->rstart;
4633     cmap  = a->garray;
4634     nzA   = a->A->cmap->n;
4635     nzB   = a->B->cmap->n;
4636     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4637     ncols = 0;
4638     for (i=0; i<nzB; i++) {
4639       if (cmap[i] < start) idx[ncols++] = cmap[i];
4640       else break;
4641     }
4642     imark = i;
4643     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4644     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4645     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4646   } else {
4647     iscola = *col;
4648   }
4649   if (scall != MAT_INITIAL_MATRIX) {
4650     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4651     aloc[0] = *A_loc;
4652   }
4653   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4654   *A_loc = aloc[0];
4655   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4656   if (!row) {
4657     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4658   }
4659   if (!col) {
4660     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4661   }
4662   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4663   PetscFunctionReturn(0);
4664 }
4665 
4666 #undef __FUNCT__
4667 #define __FUNCT__ "MatGetBrowsOfAcols"
4668 /*@C
4669     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4670 
4671     Collective on Mat
4672 
4673    Input Parameters:
4674 +    A,B - the matrices in mpiaij format
4675 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4676 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4677 
4678    Output Parameter:
4679 +    rowb, colb - index sets of rows and columns of B to extract
4680 -    B_seq - the sequential matrix generated
4681 
4682     Level: developer
4683 
4684 @*/
4685 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4686 {
4687   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4688   PetscErrorCode ierr;
4689   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4690   IS             isrowb,iscolb;
4691   Mat            *bseq=NULL;
4692 
4693   PetscFunctionBegin;
4694   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4695     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4696   }
4697   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4698 
4699   if (scall == MAT_INITIAL_MATRIX) {
4700     start = A->cmap->rstart;
4701     cmap  = a->garray;
4702     nzA   = a->A->cmap->n;
4703     nzB   = a->B->cmap->n;
4704     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4705     ncols = 0;
4706     for (i=0; i<nzB; i++) {  /* row < local row index */
4707       if (cmap[i] < start) idx[ncols++] = cmap[i];
4708       else break;
4709     }
4710     imark = i;
4711     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4712     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4713     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4714     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4715   } else {
4716     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4717     isrowb  = *rowb; iscolb = *colb;
4718     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4719     bseq[0] = *B_seq;
4720   }
4721   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4722   *B_seq = bseq[0];
4723   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4724   if (!rowb) {
4725     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4726   } else {
4727     *rowb = isrowb;
4728   }
4729   if (!colb) {
4730     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4731   } else {
4732     *colb = iscolb;
4733   }
4734   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4735   PetscFunctionReturn(0);
4736 }
4737 
4738 #undef __FUNCT__
4739 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4740 /*
4741     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4742     of the OFF-DIAGONAL portion of local A
4743 
4744     Collective on Mat
4745 
4746    Input Parameters:
4747 +    A,B - the matrices in mpiaij format
4748 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4749 
4750    Output Parameter:
4751 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4752 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4753 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4754 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4755 
4756     Level: developer
4757 
4758 */
4759 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4760 {
4761   VecScatter_MPI_General *gen_to,*gen_from;
4762   PetscErrorCode         ierr;
4763   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4764   Mat_SeqAIJ             *b_oth;
4765   VecScatter             ctx =a->Mvctx;
4766   MPI_Comm               comm;
4767   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4768   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4769   PetscScalar            *rvalues,*svalues;
4770   MatScalar              *b_otha,*bufa,*bufA;
4771   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4772   MPI_Request            *rwaits = NULL,*swaits = NULL;
4773   MPI_Status             *sstatus,rstatus;
4774   PetscMPIInt            jj,size;
4775   PetscInt               *cols,sbs,rbs;
4776   PetscScalar            *vals;
4777 
4778   PetscFunctionBegin;
4779   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4780   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4781 
4782   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4783     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4784   }
4785   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4786   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4787 
4788   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4789   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4790   rvalues  = gen_from->values; /* holds the length of receiving row */
4791   svalues  = gen_to->values;   /* holds the length of sending row */
4792   nrecvs   = gen_from->n;
4793   nsends   = gen_to->n;
4794 
4795   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4796   srow    = gen_to->indices;    /* local row index to be sent */
4797   sstarts = gen_to->starts;
4798   sprocs  = gen_to->procs;
4799   sstatus = gen_to->sstatus;
4800   sbs     = gen_to->bs;
4801   rstarts = gen_from->starts;
4802   rprocs  = gen_from->procs;
4803   rbs     = gen_from->bs;
4804 
4805   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4806   if (scall == MAT_INITIAL_MATRIX) {
4807     /* i-array */
4808     /*---------*/
4809     /*  post receives */
4810     for (i=0; i<nrecvs; i++) {
4811       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4812       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4813       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4814     }
4815 
4816     /* pack the outgoing message */
4817     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4818 
4819     sstartsj[0] = 0;
4820     rstartsj[0] = 0;
4821     len         = 0; /* total length of j or a array to be sent */
4822     k           = 0;
4823     for (i=0; i<nsends; i++) {
4824       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4825       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4826       for (j=0; j<nrows; j++) {
4827         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4828         for (l=0; l<sbs; l++) {
4829           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4830 
4831           rowlen[j*sbs+l] = ncols;
4832 
4833           len += ncols;
4834           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4835         }
4836         k++;
4837       }
4838       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4839 
4840       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4841     }
4842     /* recvs and sends of i-array are completed */
4843     i = nrecvs;
4844     while (i--) {
4845       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4846     }
4847     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4848 
4849     /* allocate buffers for sending j and a arrays */
4850     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4851     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4852 
4853     /* create i-array of B_oth */
4854     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4855 
4856     b_othi[0] = 0;
4857     len       = 0; /* total length of j or a array to be received */
4858     k         = 0;
4859     for (i=0; i<nrecvs; i++) {
4860       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4861       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4862       for (j=0; j<nrows; j++) {
4863         b_othi[k+1] = b_othi[k] + rowlen[j];
4864         len        += rowlen[j]; k++;
4865       }
4866       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4867     }
4868 
4869     /* allocate space for j and a arrrays of B_oth */
4870     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4871     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4872 
4873     /* j-array */
4874     /*---------*/
4875     /*  post receives of j-array */
4876     for (i=0; i<nrecvs; i++) {
4877       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4878       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4879     }
4880 
4881     /* pack the outgoing message j-array */
4882     k = 0;
4883     for (i=0; i<nsends; i++) {
4884       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4885       bufJ  = bufj+sstartsj[i];
4886       for (j=0; j<nrows; j++) {
4887         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4888         for (ll=0; ll<sbs; ll++) {
4889           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4890           for (l=0; l<ncols; l++) {
4891             *bufJ++ = cols[l];
4892           }
4893           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4894         }
4895       }
4896       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4897     }
4898 
4899     /* recvs and sends of j-array are completed */
4900     i = nrecvs;
4901     while (i--) {
4902       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4903     }
4904     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4905   } else if (scall == MAT_REUSE_MATRIX) {
4906     sstartsj = *startsj_s;
4907     rstartsj = *startsj_r;
4908     bufa     = *bufa_ptr;
4909     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4910     b_otha   = b_oth->a;
4911   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4912 
4913   /* a-array */
4914   /*---------*/
4915   /*  post receives of a-array */
4916   for (i=0; i<nrecvs; i++) {
4917     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4918     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4919   }
4920 
4921   /* pack the outgoing message a-array */
4922   k = 0;
4923   for (i=0; i<nsends; i++) {
4924     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4925     bufA  = bufa+sstartsj[i];
4926     for (j=0; j<nrows; j++) {
4927       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4928       for (ll=0; ll<sbs; ll++) {
4929         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4930         for (l=0; l<ncols; l++) {
4931           *bufA++ = vals[l];
4932         }
4933         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4934       }
4935     }
4936     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4937   }
4938   /* recvs and sends of a-array are completed */
4939   i = nrecvs;
4940   while (i--) {
4941     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4942   }
4943   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4944   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4945 
4946   if (scall == MAT_INITIAL_MATRIX) {
4947     /* put together the new matrix */
4948     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4949 
4950     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4951     /* Since these are PETSc arrays, change flags to free them as necessary. */
4952     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4953     b_oth->free_a  = PETSC_TRUE;
4954     b_oth->free_ij = PETSC_TRUE;
4955     b_oth->nonew   = 0;
4956 
4957     ierr = PetscFree(bufj);CHKERRQ(ierr);
4958     if (!startsj_s || !bufa_ptr) {
4959       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4960       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4961     } else {
4962       *startsj_s = sstartsj;
4963       *startsj_r = rstartsj;
4964       *bufa_ptr  = bufa;
4965     }
4966   }
4967   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4968   PetscFunctionReturn(0);
4969 }
4970 
4971 #undef __FUNCT__
4972 #define __FUNCT__ "MatGetCommunicationStructs"
4973 /*@C
4974   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4975 
4976   Not Collective
4977 
4978   Input Parameters:
4979 . A - The matrix in mpiaij format
4980 
4981   Output Parameter:
4982 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4983 . colmap - A map from global column index to local index into lvec
4984 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4985 
4986   Level: developer
4987 
4988 @*/
4989 #if defined(PETSC_USE_CTABLE)
4990 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4991 #else
4992 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4993 #endif
4994 {
4995   Mat_MPIAIJ *a;
4996 
4997   PetscFunctionBegin;
4998   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4999   PetscValidPointer(lvec, 2);
5000   PetscValidPointer(colmap, 3);
5001   PetscValidPointer(multScatter, 4);
5002   a = (Mat_MPIAIJ*) A->data;
5003   if (lvec) *lvec = a->lvec;
5004   if (colmap) *colmap = a->colmap;
5005   if (multScatter) *multScatter = a->Mvctx;
5006   PetscFunctionReturn(0);
5007 }
5008 
5009 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5010 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5011 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5012 #if defined(PETSC_HAVE_ELEMENTAL)
5013 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5014 #endif
5015 
5016 #undef __FUNCT__
5017 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5018 /*
5019     Computes (B'*A')' since computing B*A directly is untenable
5020 
5021                n                       p                          p
5022         (              )       (              )         (                  )
5023       m (      A       )  *  n (       B      )   =   m (         C        )
5024         (              )       (              )         (                  )
5025 
5026 */
5027 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5028 {
5029   PetscErrorCode ierr;
5030   Mat            At,Bt,Ct;
5031 
5032   PetscFunctionBegin;
5033   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5034   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5035   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5036   ierr = MatDestroy(&At);CHKERRQ(ierr);
5037   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5038   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5039   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5040   PetscFunctionReturn(0);
5041 }
5042 
5043 #undef __FUNCT__
5044 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5045 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5046 {
5047   PetscErrorCode ierr;
5048   PetscInt       m=A->rmap->n,n=B->cmap->n;
5049   Mat            Cmat;
5050 
5051   PetscFunctionBegin;
5052   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5053   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5054   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5055   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5056   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5057   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5058   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5059   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5060 
5061   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5062 
5063   *C = Cmat;
5064   PetscFunctionReturn(0);
5065 }
5066 
5067 /* ----------------------------------------------------------------*/
5068 #undef __FUNCT__
5069 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5070 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5071 {
5072   PetscErrorCode ierr;
5073 
5074   PetscFunctionBegin;
5075   if (scall == MAT_INITIAL_MATRIX) {
5076     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5077     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5078     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5079   }
5080   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5081   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5082   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5083   PetscFunctionReturn(0);
5084 }
5085 
5086 /*MC
5087    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5088 
5089    Options Database Keys:
5090 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5091 
5092   Level: beginner
5093 
5094 .seealso: MatCreateAIJ()
5095 M*/
5096 
5097 #undef __FUNCT__
5098 #define __FUNCT__ "MatCreate_MPIAIJ"
5099 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5100 {
5101   Mat_MPIAIJ     *b;
5102   PetscErrorCode ierr;
5103   PetscMPIInt    size;
5104 
5105   PetscFunctionBegin;
5106   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5107 
5108   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5109   B->data       = (void*)b;
5110   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5111   B->assembled  = PETSC_FALSE;
5112   B->insertmode = NOT_SET_VALUES;
5113   b->size       = size;
5114 
5115   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5116 
5117   /* build cache for off array entries formed */
5118   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5119 
5120   b->donotstash  = PETSC_FALSE;
5121   b->colmap      = 0;
5122   b->garray      = 0;
5123   b->roworiented = PETSC_TRUE;
5124 
5125   /* stuff used for matrix vector multiply */
5126   b->lvec  = NULL;
5127   b->Mvctx = NULL;
5128 
5129   /* stuff for MatGetRow() */
5130   b->rowindices   = 0;
5131   b->rowvalues    = 0;
5132   b->getrowactive = PETSC_FALSE;
5133 
5134   /* flexible pointer used in CUSP/CUSPARSE classes */
5135   b->spptr = NULL;
5136 
5137   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5138   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5139   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5140   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5141   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5142   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5143   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5144   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5145   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5146   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5147   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5148 #if defined(PETSC_HAVE_ELEMENTAL)
5149   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5150 #endif
5151   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5152   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5153   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5154   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5155   PetscFunctionReturn(0);
5156 }
5157 
5158 #undef __FUNCT__
5159 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5160 /*@C
5161      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5162          and "off-diagonal" part of the matrix in CSR format.
5163 
5164    Collective on MPI_Comm
5165 
5166    Input Parameters:
5167 +  comm - MPI communicator
5168 .  m - number of local rows (Cannot be PETSC_DECIDE)
5169 .  n - This value should be the same as the local size used in creating the
5170        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5171        calculated if N is given) For square matrices n is almost always m.
5172 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5173 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5174 .   i - row indices for "diagonal" portion of matrix
5175 .   j - column indices
5176 .   a - matrix values
5177 .   oi - row indices for "off-diagonal" portion of matrix
5178 .   oj - column indices
5179 -   oa - matrix values
5180 
5181    Output Parameter:
5182 .   mat - the matrix
5183 
5184    Level: advanced
5185 
5186    Notes:
5187        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5188        must free the arrays once the matrix has been destroyed and not before.
5189 
5190        The i and j indices are 0 based
5191 
5192        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5193 
5194        This sets local rows and cannot be used to set off-processor values.
5195 
5196        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5197        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5198        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5199        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5200        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5201        communication if it is known that only local entries will be set.
5202 
5203 .keywords: matrix, aij, compressed row, sparse, parallel
5204 
5205 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5206           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5207 @*/
5208 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5209 {
5210   PetscErrorCode ierr;
5211   Mat_MPIAIJ     *maij;
5212 
5213   PetscFunctionBegin;
5214   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5215   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5216   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5217   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5218   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5219   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5220   maij = (Mat_MPIAIJ*) (*mat)->data;
5221 
5222   (*mat)->preallocated = PETSC_TRUE;
5223 
5224   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5225   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5226 
5227   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5228   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5229 
5230   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5231   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5232   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5233   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5234 
5235   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5236   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5237   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5238   PetscFunctionReturn(0);
5239 }
5240 
5241 /*
5242     Special version for direct calls from Fortran
5243 */
5244 #include <petsc/private/fortranimpl.h>
5245 
5246 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5247 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5248 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5249 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5250 #endif
5251 
5252 /* Change these macros so can be used in void function */
5253 #undef CHKERRQ
5254 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5255 #undef SETERRQ2
5256 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5257 #undef SETERRQ3
5258 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5259 #undef SETERRQ
5260 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5261 
5262 #undef __FUNCT__
5263 #define __FUNCT__ "matsetvaluesmpiaij_"
5264 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5265 {
5266   Mat            mat  = *mmat;
5267   PetscInt       m    = *mm, n = *mn;
5268   InsertMode     addv = *maddv;
5269   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5270   PetscScalar    value;
5271   PetscErrorCode ierr;
5272 
5273   MatCheckPreallocated(mat,1);
5274   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5275 
5276 #if defined(PETSC_USE_DEBUG)
5277   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5278 #endif
5279   {
5280     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5281     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5282     PetscBool roworiented = aij->roworiented;
5283 
5284     /* Some Variables required in the macro */
5285     Mat        A                 = aij->A;
5286     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5287     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5288     MatScalar  *aa               = a->a;
5289     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5290     Mat        B                 = aij->B;
5291     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5292     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5293     MatScalar  *ba               = b->a;
5294 
5295     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5296     PetscInt  nonew = a->nonew;
5297     MatScalar *ap1,*ap2;
5298 
5299     PetscFunctionBegin;
5300     for (i=0; i<m; i++) {
5301       if (im[i] < 0) continue;
5302 #if defined(PETSC_USE_DEBUG)
5303       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5304 #endif
5305       if (im[i] >= rstart && im[i] < rend) {
5306         row      = im[i] - rstart;
5307         lastcol1 = -1;
5308         rp1      = aj + ai[row];
5309         ap1      = aa + ai[row];
5310         rmax1    = aimax[row];
5311         nrow1    = ailen[row];
5312         low1     = 0;
5313         high1    = nrow1;
5314         lastcol2 = -1;
5315         rp2      = bj + bi[row];
5316         ap2      = ba + bi[row];
5317         rmax2    = bimax[row];
5318         nrow2    = bilen[row];
5319         low2     = 0;
5320         high2    = nrow2;
5321 
5322         for (j=0; j<n; j++) {
5323           if (roworiented) value = v[i*n+j];
5324           else value = v[i+j*m];
5325           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5326           if (in[j] >= cstart && in[j] < cend) {
5327             col = in[j] - cstart;
5328             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5329           } else if (in[j] < 0) continue;
5330 #if defined(PETSC_USE_DEBUG)
5331           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5332 #endif
5333           else {
5334             if (mat->was_assembled) {
5335               if (!aij->colmap) {
5336                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5337               }
5338 #if defined(PETSC_USE_CTABLE)
5339               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5340               col--;
5341 #else
5342               col = aij->colmap[in[j]] - 1;
5343 #endif
5344               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5345                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5346                 col  =  in[j];
5347                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5348                 B     = aij->B;
5349                 b     = (Mat_SeqAIJ*)B->data;
5350                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5351                 rp2   = bj + bi[row];
5352                 ap2   = ba + bi[row];
5353                 rmax2 = bimax[row];
5354                 nrow2 = bilen[row];
5355                 low2  = 0;
5356                 high2 = nrow2;
5357                 bm    = aij->B->rmap->n;
5358                 ba    = b->a;
5359               }
5360             } else col = in[j];
5361             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5362           }
5363         }
5364       } else if (!aij->donotstash) {
5365         if (roworiented) {
5366           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5367         } else {
5368           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5369         }
5370       }
5371     }
5372   }
5373   PetscFunctionReturnVoid();
5374 }
5375 
5376