xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c7351cbe61fe344c304f4415680d0c6788bf5ca2)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *owners = A->rmap->range;
787   PetscInt       n      = A->rmap->n;
788   PetscSF        sf;
789   PetscInt      *lrows;
790   PetscSFNode   *rrows;
791   PetscInt       r, p = 0, len = 0;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795   /* Create SF where leaves are input rows and roots are owned rows */
796   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
797   for (r = 0; r < n; ++r) lrows[r] = -1;
798   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
799   for (r = 0; r < N; ++r) {
800     const PetscInt idx   = rows[r];
801     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
802     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
803       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
804     }
805     if (A->nooffproczerorows) {
806       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
807       lrows[len++] = idx - owners[p];
808     } else {
809       rrows[r].rank = p;
810       rrows[r].index = rows[r] - owners[p];
811     }
812   }
813   if (!A->nooffproczerorows) {
814     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
815     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
816     /* Collect flags for rows to be zeroed */
817     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
818     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
819     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
820     /* Compress and put in row numbers */
821     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
822   }
823   /* fix right hand side if needed */
824   if (x && b) {
825     const PetscScalar *xx;
826     PetscScalar       *bb;
827 
828     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
831     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
833   }
834   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
835   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
836   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
837     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
838   } else if (diag != 0.0) {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
841     for (r = 0; r < len; ++r) {
842       const PetscInt row = lrows[r] + A->rmap->rstart;
843       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
844     }
845     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
846     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
847   } else {
848     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
849   }
850   ierr = PetscFree(lrows);CHKERRQ(ierr);
851 
852   /* only change matrix nonzero state if pattern was allowed to be changed */
853   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
854     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
855     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
865   PetscErrorCode    ierr;
866   PetscMPIInt       n = A->rmap->n;
867   PetscInt          i,j,r,m,p = 0,len = 0;
868   PetscInt          *lrows,*owners = A->rmap->range;
869   PetscSFNode       *rrows;
870   PetscSF           sf;
871   const PetscScalar *xx;
872   PetscScalar       *bb,*mask;
873   Vec               xmask,lmask;
874   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
875   const PetscInt    *aj, *ii,*ridx;
876   PetscScalar       *aa;
877 
878   PetscFunctionBegin;
879   /* Create SF where leaves are input rows and roots are owned rows */
880   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
881   for (r = 0; r < n; ++r) lrows[r] = -1;
882   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
883   for (r = 0; r < N; ++r) {
884     const PetscInt idx   = rows[r];
885     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
886     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
887       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
888     }
889     rrows[r].rank  = p;
890     rrows[r].index = rows[r] - owners[p];
891   }
892   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
893   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
894   /* Collect flags for rows to be zeroed */
895   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
896   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
897   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
898   /* Compress and put in row numbers */
899   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
900   /* zero diagonal part of matrix */
901   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
902   /* handle off diagonal part of matrix */
903   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
904   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
905   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
906   for (i=0; i<len; i++) bb[lrows[i]] = 1;
907   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
908   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
911   if (x) {
912     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
916   }
917   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
918   /* remove zeroed rows of off diagonal matrix */
919   ii = aij->i;
920   for (i=0; i<len; i++) {
921     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
922   }
923   /* loop over all elements of off process part of matrix zeroing removed columns*/
924   if (aij->compressedrow.use) {
925     m    = aij->compressedrow.nrows;
926     ii   = aij->compressedrow.i;
927     ridx = aij->compressedrow.rindex;
928     for (i=0; i<m; i++) {
929       n  = ii[i+1] - ii[i];
930       aj = aij->j + ii[i];
931       aa = aij->a + ii[i];
932 
933       for (j=0; j<n; j++) {
934         if (PetscAbsScalar(mask[*aj])) {
935           if (b) bb[*ridx] -= *aa*xx[*aj];
936           *aa = 0.0;
937         }
938         aa++;
939         aj++;
940       }
941       ridx++;
942     }
943   } else { /* do not use compressed row format */
944     m = l->B->rmap->n;
945     for (i=0; i<m; i++) {
946       n  = ii[i+1] - ii[i];
947       aj = aij->j + ii[i];
948       aa = aij->a + ii[i];
949       for (j=0; j<n; j++) {
950         if (PetscAbsScalar(mask[*aj])) {
951           if (b) bb[i] -= *aa*xx[*aj];
952           *aa = 0.0;
953         }
954         aa++;
955         aj++;
956       }
957     }
958   }
959   if (x) {
960     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
961     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
962   }
963   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
964   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
965   ierr = PetscFree(lrows);CHKERRQ(ierr);
966 
967   /* only change matrix nonzero state if pattern was allowed to be changed */
968   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
969     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
970     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMult_MPIAIJ"
977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981   PetscInt       nt;
982 
983   PetscFunctionBegin;
984   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
985   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1008 {
1009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1025   PetscErrorCode ierr;
1026   PetscBool      merged;
1027 
1028   PetscFunctionBegin;
1029   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1030   /* do nondiagonal part */
1031   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1032   if (!merged) {
1033     /* send it on its way */
1034     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035     /* do local part */
1036     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1037     /* receive remote parts: note this assumes the values are not actually */
1038     /* added in yy until the next line, */
1039     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1040   } else {
1041     /* do local part */
1042     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1043     /* send it on its way */
1044     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     /* values actually were received in the Begin() but we need to call this nop */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1053 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1054 {
1055   MPI_Comm       comm;
1056   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1057   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1058   IS             Me,Notme;
1059   PetscErrorCode ierr;
1060   PetscInt       M,N,first,last,*notme,i;
1061   PetscMPIInt    size;
1062 
1063   PetscFunctionBegin;
1064   /* Easy test: symmetric diagonal block */
1065   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1066   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1067   if (!*f) PetscFunctionReturn(0);
1068   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1070   if (size == 1) PetscFunctionReturn(0);
1071 
1072   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1073   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1074   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1075   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1076   for (i=0; i<first; i++) notme[i] = i;
1077   for (i=last; i<M; i++) notme[i-last+first] = i;
1078   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1079   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1080   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1081   Aoff = Aoffs[0];
1082   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1083   Boff = Boffs[0];
1084   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1085   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1086   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1087   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1088   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1089   ierr = PetscFree(notme);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1096 {
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   /* do nondiagonal part */
1102   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1103   /* send it on its way */
1104   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1105   /* do local part */
1106   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1107   /* receive remote parts */
1108   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*
1113   This only works correctly for square matrices where the subblock A->A is the
1114    diagonal block
1115 */
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1119 {
1120   PetscErrorCode ierr;
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122 
1123   PetscFunctionBegin;
1124   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1125   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPIAIJ"
1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1139   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatDestroy_MPIAIJ"
1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1146 {
1147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151 #if defined(PETSC_USE_LOG)
1152   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1153 #endif
1154   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1156   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1157   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1158 #if defined(PETSC_USE_CTABLE)
1159   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1160 #else
1161   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1162 #endif
1163   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1164   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1165   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1166   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1167   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1168   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1169 
1170   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1188 {
1189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1190   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1191   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1192   PetscErrorCode ierr;
1193   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1194   int            fd;
1195   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1196   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1197   PetscScalar    *column_values;
1198   PetscInt       message_count,flowcontrolcount;
1199   FILE           *file;
1200 
1201   PetscFunctionBegin;
1202   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1203   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1204   nz   = A->nz + B->nz;
1205   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1206   if (!rank) {
1207     header[0] = MAT_FILE_CLASSID;
1208     header[1] = mat->rmap->N;
1209     header[2] = mat->cmap->N;
1210 
1211     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     /* get largest number of rows any processor has */
1214     rlen  = mat->rmap->n;
1215     range = mat->rmap->range;
1216     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1217   } else {
1218     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     rlen = mat->rmap->n;
1220   }
1221 
1222   /* load up the local row counts */
1223   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1224   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1225 
1226   /* store the row lengths to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     for (i=1; i<size; i++) {
1231       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1232       rlen = range[i+1] - range[i];
1233       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1235     }
1236     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1237   } else {
1238     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1239     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1241   }
1242   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1243 
1244   /* load up the local column indices */
1245   nzmax = nz; /* th processor needs space a largest processor needs */
1246   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1248   cnt   = 0;
1249   for (i=0; i<mat->rmap->n; i++) {
1250     for (j=B->i[i]; j<B->i[i+1]; j++) {
1251       if ((col = garray[B->j[j]]) > cstart) break;
1252       column_indices[cnt++] = col;
1253     }
1254     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1255     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1256   }
1257   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1258 
1259   /* store the column indices to the file */
1260   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1261   if (!rank) {
1262     MPI_Status status;
1263     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     for (i=1; i<size; i++) {
1265       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1266       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1267       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1268       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     }
1271     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1272   } else {
1273     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1274     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1277   }
1278   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1279 
1280   /* load up the local column values */
1281   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1282   cnt  = 0;
1283   for (i=0; i<mat->rmap->n; i++) {
1284     for (j=B->i[i]; j<B->i[i+1]; j++) {
1285       if (garray[B->j[j]] > cstart) break;
1286       column_values[cnt++] = B->a[j];
1287     }
1288     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1289     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1290   }
1291   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1292 
1293   /* store the column values to the file */
1294   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1295   if (!rank) {
1296     MPI_Status status;
1297     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1301       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1302       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1304     }
1305     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1306   } else {
1307     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1308     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1311   }
1312   ierr = PetscFree(column_values);CHKERRQ(ierr);
1313 
1314   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1315   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #include <petscdraw.h>
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1323 {
1324   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1325   PetscErrorCode    ierr;
1326   PetscMPIInt       rank = aij->rank,size = aij->size;
1327   PetscBool         isdraw,iascii,isbinary;
1328   PetscViewer       sviewer;
1329   PetscViewerFormat format;
1330 
1331   PetscFunctionBegin;
1332   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1333   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1334   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1335   if (iascii) {
1336     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1337     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1338       MatInfo   info;
1339       PetscBool inodes;
1340 
1341       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1342       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1343       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1344       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1345       if (!inodes) {
1346         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1347                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1348       } else {
1349         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1350                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1351       }
1352       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1354       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1356       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1357       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1359       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1360       PetscFunctionReturn(0);
1361     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1362       PetscInt inodecount,inodelimit,*inodes;
1363       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1364       if (inodes) {
1365         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1366       } else {
1367         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1368       }
1369       PetscFunctionReturn(0);
1370     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1371       PetscFunctionReturn(0);
1372     }
1373   } else if (isbinary) {
1374     if (size == 1) {
1375       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1376       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1377     } else {
1378       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1379     }
1380     PetscFunctionReturn(0);
1381   } else if (isdraw) {
1382     PetscDraw draw;
1383     PetscBool isnull;
1384     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1385     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1386   }
1387 
1388   {
1389     /* assemble the entire matrix onto first processor. */
1390     Mat        A;
1391     Mat_SeqAIJ *Aloc;
1392     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1393     MatScalar  *a;
1394 
1395     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1396     if (!rank) {
1397       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1398     } else {
1399       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1400     }
1401     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1402     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1403     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1404     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1405     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1406 
1407     /* copy over the A part */
1408     Aloc = (Mat_SeqAIJ*)aij->A->data;
1409     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1410     row  = mat->rmap->rstart;
1411     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1412     for (i=0; i<m; i++) {
1413       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1414       row++;
1415       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1416     }
1417     aj = Aloc->j;
1418     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1419 
1420     /* copy over the B part */
1421     Aloc = (Mat_SeqAIJ*)aij->B->data;
1422     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1423     row  = mat->rmap->rstart;
1424     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1425     ct   = cols;
1426     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1427     for (i=0; i<m; i++) {
1428       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1429       row++;
1430       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1431     }
1432     ierr = PetscFree(ct);CHKERRQ(ierr);
1433     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1434     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435     /*
1436        Everyone has to call to draw the matrix since the graphics waits are
1437        synchronized across all processors that share the PetscDraw object
1438     */
1439     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1440     if (!rank) {
1441       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1442       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1443     }
1444     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1445     ierr = MatDestroy(&A);CHKERRQ(ierr);
1446   }
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatView_MPIAIJ"
1452 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1453 {
1454   PetscErrorCode ierr;
1455   PetscBool      iascii,isdraw,issocket,isbinary;
1456 
1457   PetscFunctionBegin;
1458   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1459   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1462   if (iascii || isdraw || isbinary || issocket) {
1463     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatSOR_MPIAIJ"
1470 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1471 {
1472   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1473   PetscErrorCode ierr;
1474   Vec            bb1 = 0;
1475   PetscBool      hasop;
1476 
1477   PetscFunctionBegin;
1478   if (flag == SOR_APPLY_UPPER) {
1479     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1480     PetscFunctionReturn(0);
1481   }
1482 
1483   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1484     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1485   }
1486 
1487   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1488     if (flag & SOR_ZERO_INITIAL_GUESS) {
1489       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1490       its--;
1491     }
1492 
1493     while (its--) {
1494       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496 
1497       /* update rhs: bb1 = bb - B*x */
1498       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1499       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1500 
1501       /* local sweep */
1502       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1503     }
1504   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1505     if (flag & SOR_ZERO_INITIAL_GUESS) {
1506       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1507       its--;
1508     }
1509     while (its--) {
1510       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512 
1513       /* update rhs: bb1 = bb - B*x */
1514       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1515       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1516 
1517       /* local sweep */
1518       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1519     }
1520   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1521     if (flag & SOR_ZERO_INITIAL_GUESS) {
1522       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1523       its--;
1524     }
1525     while (its--) {
1526       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1527       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1528 
1529       /* update rhs: bb1 = bb - B*x */
1530       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1531       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1532 
1533       /* local sweep */
1534       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1535     }
1536   } else if (flag & SOR_EISENSTAT) {
1537     Vec xx1;
1538 
1539     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1540     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1541 
1542     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1543     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544     if (!mat->diag) {
1545       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1546       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1547     }
1548     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1549     if (hasop) {
1550       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1551     } else {
1552       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1553     }
1554     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1555 
1556     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1557 
1558     /* local sweep */
1559     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1560     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1561     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1562   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1563 
1564   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatPermute_MPIAIJ"
1570 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1571 {
1572   Mat            aA,aB,Aperm;
1573   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1574   PetscScalar    *aa,*ba;
1575   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1576   PetscSF        rowsf,sf;
1577   IS             parcolp = NULL;
1578   PetscBool      done;
1579   PetscErrorCode ierr;
1580 
1581   PetscFunctionBegin;
1582   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1583   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1584   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1585   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1586 
1587   /* Invert row permutation to find out where my rows should go */
1588   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1589   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1590   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1591   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1592   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1593   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1594 
1595   /* Invert column permutation to find out where my columns should go */
1596   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1597   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1598   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1599   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1600   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1601   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1602   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1603 
1604   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1605   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1606   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1607 
1608   /* Find out where my gcols should go */
1609   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1610   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1611   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1612   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1613   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1614   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1615   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1616   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1617 
1618   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1619   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1620   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1621   for (i=0; i<m; i++) {
1622     PetscInt row = rdest[i],rowner;
1623     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1624     for (j=ai[i]; j<ai[i+1]; j++) {
1625       PetscInt cowner,col = cdest[aj[j]];
1626       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1627       if (rowner == cowner) dnnz[i]++;
1628       else onnz[i]++;
1629     }
1630     for (j=bi[i]; j<bi[i+1]; j++) {
1631       PetscInt cowner,col = gcdest[bj[j]];
1632       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1633       if (rowner == cowner) dnnz[i]++;
1634       else onnz[i]++;
1635     }
1636   }
1637   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1638   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1639   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1640   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1641   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1642 
1643   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1644   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1645   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1646   for (i=0; i<m; i++) {
1647     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1648     PetscInt j0,rowlen;
1649     rowlen = ai[i+1] - ai[i];
1650     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1651       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1652       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1653     }
1654     rowlen = bi[i+1] - bi[i];
1655     for (j0=j=0; j<rowlen; j0=j) {
1656       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1657       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1658     }
1659   }
1660   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1661   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1662   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1663   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1664   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1665   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1666   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1667   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1668   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1669   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1670   *B = Aperm;
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1676 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1677 {
1678   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1679   Mat            A    = mat->A,B = mat->B;
1680   PetscErrorCode ierr;
1681   PetscReal      isend[5],irecv[5];
1682 
1683   PetscFunctionBegin;
1684   info->block_size = 1.0;
1685   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1686 
1687   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1688   isend[3] = info->memory;  isend[4] = info->mallocs;
1689 
1690   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1691 
1692   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1693   isend[3] += info->memory;  isend[4] += info->mallocs;
1694   if (flag == MAT_LOCAL) {
1695     info->nz_used      = isend[0];
1696     info->nz_allocated = isend[1];
1697     info->nz_unneeded  = isend[2];
1698     info->memory       = isend[3];
1699     info->mallocs      = isend[4];
1700   } else if (flag == MAT_GLOBAL_MAX) {
1701     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1702 
1703     info->nz_used      = irecv[0];
1704     info->nz_allocated = irecv[1];
1705     info->nz_unneeded  = irecv[2];
1706     info->memory       = irecv[3];
1707     info->mallocs      = irecv[4];
1708   } else if (flag == MAT_GLOBAL_SUM) {
1709     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1710 
1711     info->nz_used      = irecv[0];
1712     info->nz_allocated = irecv[1];
1713     info->nz_unneeded  = irecv[2];
1714     info->memory       = irecv[3];
1715     info->mallocs      = irecv[4];
1716   }
1717   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1718   info->fill_ratio_needed = 0;
1719   info->factor_mallocs    = 0;
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatSetOption_MPIAIJ"
1725 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1726 {
1727   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   switch (op) {
1732   case MAT_NEW_NONZERO_LOCATIONS:
1733   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1734   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1735   case MAT_KEEP_NONZERO_PATTERN:
1736   case MAT_NEW_NONZERO_LOCATION_ERR:
1737   case MAT_USE_INODES:
1738   case MAT_IGNORE_ZERO_ENTRIES:
1739     MatCheckPreallocated(A,1);
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1742     break;
1743   case MAT_ROW_ORIENTED:
1744     a->roworiented = flg;
1745 
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1748     break;
1749   case MAT_NEW_DIAGONALS:
1750     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1751     break;
1752   case MAT_IGNORE_OFF_PROC_ENTRIES:
1753     a->donotstash = flg;
1754     break;
1755   case MAT_SPD:
1756     A->spd_set = PETSC_TRUE;
1757     A->spd     = flg;
1758     if (flg) {
1759       A->symmetric                  = PETSC_TRUE;
1760       A->structurally_symmetric     = PETSC_TRUE;
1761       A->symmetric_set              = PETSC_TRUE;
1762       A->structurally_symmetric_set = PETSC_TRUE;
1763     }
1764     break;
1765   case MAT_SYMMETRIC:
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     break;
1768   case MAT_STRUCTURALLY_SYMMETRIC:
1769     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1770     break;
1771   case MAT_HERMITIAN:
1772     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1773     break;
1774   case MAT_SYMMETRY_ETERNAL:
1775     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1776     break;
1777   default:
1778     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1779   }
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatGetRow_MPIAIJ"
1785 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1786 {
1787   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1788   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1789   PetscErrorCode ierr;
1790   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1791   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1792   PetscInt       *cmap,*idx_p;
1793 
1794   PetscFunctionBegin;
1795   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1796   mat->getrowactive = PETSC_TRUE;
1797 
1798   if (!mat->rowvalues && (idx || v)) {
1799     /*
1800         allocate enough space to hold information from the longest row.
1801     */
1802     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1803     PetscInt   max = 1,tmp;
1804     for (i=0; i<matin->rmap->n; i++) {
1805       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1806       if (max < tmp) max = tmp;
1807     }
1808     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1809   }
1810 
1811   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1812   lrow = row - rstart;
1813 
1814   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1815   if (!v)   {pvA = 0; pvB = 0;}
1816   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1817   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1818   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1819   nztot = nzA + nzB;
1820 
1821   cmap = mat->garray;
1822   if (v  || idx) {
1823     if (nztot) {
1824       /* Sort by increasing column numbers, assuming A and B already sorted */
1825       PetscInt imark = -1;
1826       if (v) {
1827         *v = v_p = mat->rowvalues;
1828         for (i=0; i<nzB; i++) {
1829           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1830           else break;
1831         }
1832         imark = i;
1833         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1834         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1835       }
1836       if (idx) {
1837         *idx = idx_p = mat->rowindices;
1838         if (imark > -1) {
1839           for (i=0; i<imark; i++) {
1840             idx_p[i] = cmap[cworkB[i]];
1841           }
1842         } else {
1843           for (i=0; i<nzB; i++) {
1844             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1845             else break;
1846           }
1847           imark = i;
1848         }
1849         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1850         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1851       }
1852     } else {
1853       if (idx) *idx = 0;
1854       if (v)   *v   = 0;
1855     }
1856   }
1857   *nz  = nztot;
1858   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1859   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1865 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1866 {
1867   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1868 
1869   PetscFunctionBegin;
1870   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1871   aij->getrowactive = PETSC_FALSE;
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatNorm_MPIAIJ"
1877 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1878 {
1879   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1880   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1881   PetscErrorCode ierr;
1882   PetscInt       i,j,cstart = mat->cmap->rstart;
1883   PetscReal      sum = 0.0;
1884   MatScalar      *v;
1885 
1886   PetscFunctionBegin;
1887   if (aij->size == 1) {
1888     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1889   } else {
1890     if (type == NORM_FROBENIUS) {
1891       v = amat->a;
1892       for (i=0; i<amat->nz; i++) {
1893         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1894       }
1895       v = bmat->a;
1896       for (i=0; i<bmat->nz; i++) {
1897         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1898       }
1899       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1900       *norm = PetscSqrtReal(*norm);
1901     } else if (type == NORM_1) { /* max column norm */
1902       PetscReal *tmp,*tmp2;
1903       PetscInt  *jj,*garray = aij->garray;
1904       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1905       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1906       *norm = 0.0;
1907       v     = amat->a; jj = amat->j;
1908       for (j=0; j<amat->nz; j++) {
1909         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1910       }
1911       v = bmat->a; jj = bmat->j;
1912       for (j=0; j<bmat->nz; j++) {
1913         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1914       }
1915       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1916       for (j=0; j<mat->cmap->N; j++) {
1917         if (tmp2[j] > *norm) *norm = tmp2[j];
1918       }
1919       ierr = PetscFree(tmp);CHKERRQ(ierr);
1920       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1921     } else if (type == NORM_INFINITY) { /* max row norm */
1922       PetscReal ntemp = 0.0;
1923       for (j=0; j<aij->A->rmap->n; j++) {
1924         v   = amat->a + amat->i[j];
1925         sum = 0.0;
1926         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1927           sum += PetscAbsScalar(*v); v++;
1928         }
1929         v = bmat->a + bmat->i[j];
1930         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1931           sum += PetscAbsScalar(*v); v++;
1932         }
1933         if (sum > ntemp) ntemp = sum;
1934       }
1935       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1936     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1937   }
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatTranspose_MPIAIJ"
1943 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1944 {
1945   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1946   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1947   PetscErrorCode ierr;
1948   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1949   PetscInt       cstart = A->cmap->rstart,ncol;
1950   Mat            B;
1951   MatScalar      *array;
1952 
1953   PetscFunctionBegin;
1954   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1955 
1956   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1957   ai = Aloc->i; aj = Aloc->j;
1958   bi = Bloc->i; bj = Bloc->j;
1959   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1960     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1961     PetscSFNode          *oloc;
1962     PETSC_UNUSED PetscSF sf;
1963 
1964     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1965     /* compute d_nnz for preallocation */
1966     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1967     for (i=0; i<ai[ma]; i++) {
1968       d_nnz[aj[i]]++;
1969       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1970     }
1971     /* compute local off-diagonal contributions */
1972     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1973     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1974     /* map those to global */
1975     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1976     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1977     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1978     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1979     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1980     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1981     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1982 
1983     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1984     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1985     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1986     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1987     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1988     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1989   } else {
1990     B    = *matout;
1991     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1992     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1993   }
1994 
1995   /* copy over the A part */
1996   array = Aloc->a;
1997   row   = A->rmap->rstart;
1998   for (i=0; i<ma; i++) {
1999     ncol = ai[i+1]-ai[i];
2000     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2001     row++;
2002     array += ncol; aj += ncol;
2003   }
2004   aj = Aloc->j;
2005   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2006 
2007   /* copy over the B part */
2008   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2009   array = Bloc->a;
2010   row   = A->rmap->rstart;
2011   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2012   cols_tmp = cols;
2013   for (i=0; i<mb; i++) {
2014     ncol = bi[i+1]-bi[i];
2015     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2016     row++;
2017     array += ncol; cols_tmp += ncol;
2018   }
2019   ierr = PetscFree(cols);CHKERRQ(ierr);
2020 
2021   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2022   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2023   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2024     *matout = B;
2025   } else {
2026     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2033 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2034 {
2035   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2036   Mat            a    = aij->A,b = aij->B;
2037   PetscErrorCode ierr;
2038   PetscInt       s1,s2,s3;
2039 
2040   PetscFunctionBegin;
2041   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2042   if (rr) {
2043     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2044     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2045     /* Overlap communication with computation. */
2046     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2047   }
2048   if (ll) {
2049     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2050     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2051     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2052   }
2053   /* scale  the diagonal block */
2054   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2055 
2056   if (rr) {
2057     /* Do a scatter end and then right scale the off-diagonal block */
2058     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2059     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2060   }
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2066 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2067 {
2068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2069   PetscErrorCode ierr;
2070 
2071   PetscFunctionBegin;
2072   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatEqual_MPIAIJ"
2078 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2079 {
2080   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2081   Mat            a,b,c,d;
2082   PetscBool      flg;
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   a = matA->A; b = matA->B;
2087   c = matB->A; d = matB->B;
2088 
2089   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2090   if (flg) {
2091     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2092   }
2093   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatCopy_MPIAIJ"
2099 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2100 {
2101   PetscErrorCode ierr;
2102   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2103   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2104 
2105   PetscFunctionBegin;
2106   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2107   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2108     /* because of the column compression in the off-processor part of the matrix a->B,
2109        the number of columns in a->B and b->B may be different, hence we cannot call
2110        the MatCopy() directly on the two parts. If need be, we can provide a more
2111        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2112        then copying the submatrices */
2113     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2114   } else {
2115     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2116     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2117   }
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatSetUp_MPIAIJ"
2123 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2124 {
2125   PetscErrorCode ierr;
2126 
2127   PetscFunctionBegin;
2128   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 /*
2133    Computes the number of nonzeros per row needed for preallocation when X and Y
2134    have different nonzero structure.
2135 */
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2138 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2139 {
2140   PetscInt       i,j,k,nzx,nzy;
2141 
2142   PetscFunctionBegin;
2143   /* Set the number of nonzeros in the new matrix */
2144   for (i=0; i<m; i++) {
2145     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2146     nzx = xi[i+1] - xi[i];
2147     nzy = yi[i+1] - yi[i];
2148     nnz[i] = 0;
2149     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2150       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2151       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2152       nnz[i]++;
2153     }
2154     for (; k<nzy; k++) nnz[i]++;
2155   }
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2162 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2163 {
2164   PetscErrorCode ierr;
2165   PetscInt       m = Y->rmap->N;
2166   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2167   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2168 
2169   PetscFunctionBegin;
2170   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatAXPY_MPIAIJ"
2176 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2177 {
2178   PetscErrorCode ierr;
2179   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2180   PetscBLASInt   bnz,one=1;
2181   Mat_SeqAIJ     *x,*y;
2182 
2183   PetscFunctionBegin;
2184   if (str == SAME_NONZERO_PATTERN) {
2185     PetscScalar alpha = a;
2186     x    = (Mat_SeqAIJ*)xx->A->data;
2187     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2188     y    = (Mat_SeqAIJ*)yy->A->data;
2189     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2190     x    = (Mat_SeqAIJ*)xx->B->data;
2191     y    = (Mat_SeqAIJ*)yy->B->data;
2192     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2193     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2194     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2195   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2196     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2197   } else {
2198     Mat      B;
2199     PetscInt *nnz_d,*nnz_o;
2200     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2201     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2202     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2203     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2204     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2205     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2206     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2207     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2208     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2209     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2210     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2211     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2212     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2213     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2214   }
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatConjugate_MPIAIJ"
2222 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2223 {
2224 #if defined(PETSC_USE_COMPLEX)
2225   PetscErrorCode ierr;
2226   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2227 
2228   PetscFunctionBegin;
2229   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2230   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2231 #else
2232   PetscFunctionBegin;
2233 #endif
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatRealPart_MPIAIJ"
2239 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2240 {
2241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2242   PetscErrorCode ierr;
2243 
2244   PetscFunctionBegin;
2245   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2246   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2252 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2253 {
2254   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2255   PetscErrorCode ierr;
2256 
2257   PetscFunctionBegin;
2258   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2259   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #if defined(PETSC_HAVE_PBGL)
2264 
2265 #include <boost/parallel/mpi/bsp_process_group.hpp>
2266 #include <boost/graph/distributed/ilu_default_graph.hpp>
2267 #include <boost/graph/distributed/ilu_0_block.hpp>
2268 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2269 #include <boost/graph/distributed/petsc/interface.hpp>
2270 #include <boost/multi_array.hpp>
2271 #include <boost/parallel/distributed_property_map->hpp>
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2275 /*
2276   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2277 */
2278 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2279 {
2280   namespace petsc = boost::distributed::petsc;
2281 
2282   namespace graph_dist = boost::graph::distributed;
2283   using boost::graph::distributed::ilu_default::process_group_type;
2284   using boost::graph::ilu_permuted;
2285 
2286   PetscBool      row_identity, col_identity;
2287   PetscContainer c;
2288   PetscInt       m, n, M, N;
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2293   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2294   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2295   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2296 
2297   process_group_type pg;
2298   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2299   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2300   lgraph_type& level_graph = *lgraph_p;
2301   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2302 
2303   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2304   ilu_permuted(level_graph);
2305 
2306   /* put together the new matrix */
2307   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2308   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2309   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2310   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2311   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2312   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2313   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2314   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2315 
2316   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2317   ierr = PetscContainerSetPointer(c, lgraph_p);
2318   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2319   ierr = PetscContainerDestroy(&c);
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2325 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2326 {
2327   PetscFunctionBegin;
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 #undef __FUNCT__
2332 #define __FUNCT__ "MatSolve_MPIAIJ"
2333 /*
2334   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2335 */
2336 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2337 {
2338   namespace graph_dist = boost::graph::distributed;
2339 
2340   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2341   lgraph_type    *lgraph_p;
2342   PetscContainer c;
2343   PetscErrorCode ierr;
2344 
2345   PetscFunctionBegin;
2346   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2347   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2348   ierr = VecCopy(b, x);CHKERRQ(ierr);
2349 
2350   PetscScalar *array_x;
2351   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2352   PetscInt sx;
2353   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2354 
2355   PetscScalar *array_b;
2356   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2357   PetscInt sb;
2358   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2359 
2360   lgraph_type& level_graph = *lgraph_p;
2361   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2362 
2363   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2364   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2365   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2366 
2367   typedef boost::iterator_property_map<array_ref_type::iterator,
2368                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2369   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2370   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2371 
2372   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2373   PetscFunctionReturn(0);
2374 }
2375 #endif
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2379 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2380 {
2381   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2382   PetscErrorCode ierr;
2383   PetscInt       i,*idxb = 0;
2384   PetscScalar    *va,*vb;
2385   Vec            vtmp;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2389   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2390   if (idx) {
2391     for (i=0; i<A->rmap->n; i++) {
2392       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2393     }
2394   }
2395 
2396   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2397   if (idx) {
2398     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2399   }
2400   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2401   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2402 
2403   for (i=0; i<A->rmap->n; i++) {
2404     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2405       va[i] = vb[i];
2406       if (idx) idx[i] = a->garray[idxb[i]];
2407     }
2408   }
2409 
2410   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2411   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2412   ierr = PetscFree(idxb);CHKERRQ(ierr);
2413   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2419 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2420 {
2421   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2422   PetscErrorCode ierr;
2423   PetscInt       i,*idxb = 0;
2424   PetscScalar    *va,*vb;
2425   Vec            vtmp;
2426 
2427   PetscFunctionBegin;
2428   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2429   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2430   if (idx) {
2431     for (i=0; i<A->cmap->n; i++) {
2432       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2433     }
2434   }
2435 
2436   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2437   if (idx) {
2438     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2439   }
2440   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2441   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2442 
2443   for (i=0; i<A->rmap->n; i++) {
2444     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2445       va[i] = vb[i];
2446       if (idx) idx[i] = a->garray[idxb[i]];
2447     }
2448   }
2449 
2450   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2451   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2452   ierr = PetscFree(idxb);CHKERRQ(ierr);
2453   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2459 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2460 {
2461   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2462   PetscInt       n      = A->rmap->n;
2463   PetscInt       cstart = A->cmap->rstart;
2464   PetscInt       *cmap  = mat->garray;
2465   PetscInt       *diagIdx, *offdiagIdx;
2466   Vec            diagV, offdiagV;
2467   PetscScalar    *a, *diagA, *offdiagA;
2468   PetscInt       r;
2469   PetscErrorCode ierr;
2470 
2471   PetscFunctionBegin;
2472   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2473   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2474   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2475   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2476   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2477   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2478   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2479   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2480   for (r = 0; r < n; ++r) {
2481     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2482       a[r]   = diagA[r];
2483       idx[r] = cstart + diagIdx[r];
2484     } else {
2485       a[r]   = offdiagA[r];
2486       idx[r] = cmap[offdiagIdx[r]];
2487     }
2488   }
2489   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2490   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2491   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2492   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2493   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2494   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2500 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2501 {
2502   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2503   PetscInt       n      = A->rmap->n;
2504   PetscInt       cstart = A->cmap->rstart;
2505   PetscInt       *cmap  = mat->garray;
2506   PetscInt       *diagIdx, *offdiagIdx;
2507   Vec            diagV, offdiagV;
2508   PetscScalar    *a, *diagA, *offdiagA;
2509   PetscInt       r;
2510   PetscErrorCode ierr;
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2514   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2515   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2516   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2517   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2518   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2519   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2520   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2521   for (r = 0; r < n; ++r) {
2522     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2523       a[r]   = diagA[r];
2524       idx[r] = cstart + diagIdx[r];
2525     } else {
2526       a[r]   = offdiagA[r];
2527       idx[r] = cmap[offdiagIdx[r]];
2528     }
2529   }
2530   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2531   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2532   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2533   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2534   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2535   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2541 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2542 {
2543   PetscErrorCode ierr;
2544   Mat            *dummy;
2545 
2546   PetscFunctionBegin;
2547   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2548   *newmat = *dummy;
2549   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2555 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2556 {
2557   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2558   PetscErrorCode ierr;
2559 
2560   PetscFunctionBegin;
2561   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2567 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2568 {
2569   PetscErrorCode ierr;
2570   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2571 
2572   PetscFunctionBegin;
2573   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2574   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2575   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2576   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 #undef __FUNCT__
2581 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2582 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2583 {
2584   PetscFunctionBegin;
2585   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2586   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2592 /*@
2593    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2594 
2595    Collective on Mat
2596 
2597    Input Parameters:
2598 +    A - the matrix
2599 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2600 
2601 @*/
2602 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2603 {
2604   PetscErrorCode       ierr;
2605 
2606   PetscFunctionBegin;
2607   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2613 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptions *PetscOptionsObject,Mat A)
2614 {
2615   PetscErrorCode       ierr;
2616   PetscBool            sc = PETSC_FALSE,flg;
2617 
2618   PetscFunctionBegin;
2619   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2620   ierr = PetscObjectOptionsBegin((PetscObject)A);
2621     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2622     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2623     if (flg) {
2624       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2625     }
2626   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 #undef __FUNCT__
2631 #define __FUNCT__ "MatShift_MPIAIJ"
2632 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2633 {
2634   PetscErrorCode ierr;
2635   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2636   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data,*bij = (Mat_SeqAIJ*)maij->B->data;
2637 
2638   PetscFunctionBegin;
2639   if (!aij->nz && !bij->nz) {
2640     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2641   }
2642   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 /* -------------------------------------------------------------------*/
2647 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2648                                        MatGetRow_MPIAIJ,
2649                                        MatRestoreRow_MPIAIJ,
2650                                        MatMult_MPIAIJ,
2651                                 /* 4*/ MatMultAdd_MPIAIJ,
2652                                        MatMultTranspose_MPIAIJ,
2653                                        MatMultTransposeAdd_MPIAIJ,
2654 #if defined(PETSC_HAVE_PBGL)
2655                                        MatSolve_MPIAIJ,
2656 #else
2657                                        0,
2658 #endif
2659                                        0,
2660                                        0,
2661                                 /*10*/ 0,
2662                                        0,
2663                                        0,
2664                                        MatSOR_MPIAIJ,
2665                                        MatTranspose_MPIAIJ,
2666                                 /*15*/ MatGetInfo_MPIAIJ,
2667                                        MatEqual_MPIAIJ,
2668                                        MatGetDiagonal_MPIAIJ,
2669                                        MatDiagonalScale_MPIAIJ,
2670                                        MatNorm_MPIAIJ,
2671                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2672                                        MatAssemblyEnd_MPIAIJ,
2673                                        MatSetOption_MPIAIJ,
2674                                        MatZeroEntries_MPIAIJ,
2675                                 /*24*/ MatZeroRows_MPIAIJ,
2676                                        0,
2677 #if defined(PETSC_HAVE_PBGL)
2678                                        0,
2679 #else
2680                                        0,
2681 #endif
2682                                        0,
2683                                        0,
2684                                 /*29*/ MatSetUp_MPIAIJ,
2685 #if defined(PETSC_HAVE_PBGL)
2686                                        0,
2687 #else
2688                                        0,
2689 #endif
2690                                        0,
2691                                        0,
2692                                        0,
2693                                 /*34*/ MatDuplicate_MPIAIJ,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                 /*39*/ MatAXPY_MPIAIJ,
2699                                        MatGetSubMatrices_MPIAIJ,
2700                                        MatIncreaseOverlap_MPIAIJ,
2701                                        MatGetValues_MPIAIJ,
2702                                        MatCopy_MPIAIJ,
2703                                 /*44*/ MatGetRowMax_MPIAIJ,
2704                                        MatScale_MPIAIJ,
2705                                        MatShift_MPIAIJ,
2706                                        MatDiagonalSet_MPIAIJ,
2707                                        MatZeroRowsColumns_MPIAIJ,
2708                                 /*49*/ MatSetRandom_MPIAIJ,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2714                                        0,
2715                                        MatSetUnfactored_MPIAIJ,
2716                                        MatPermute_MPIAIJ,
2717                                        0,
2718                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2719                                        MatDestroy_MPIAIJ,
2720                                        MatView_MPIAIJ,
2721                                        0,
2722                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2723                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2724                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2729                                        MatGetRowMinAbs_MPIAIJ,
2730                                        0,
2731                                        MatSetColoring_MPIAIJ,
2732                                        0,
2733                                        MatSetValuesAdifor_MPIAIJ,
2734                                 /*75*/ MatFDColoringApply_AIJ,
2735                                        MatSetFromOptions_MPIAIJ,
2736                                        0,
2737                                        0,
2738                                        MatFindZeroDiagonals_MPIAIJ,
2739                                 /*80*/ 0,
2740                                        0,
2741                                        0,
2742                                 /*83*/ MatLoad_MPIAIJ,
2743                                        0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                        0,
2748                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2749                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2750                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2751                                        MatPtAP_MPIAIJ_MPIAIJ,
2752                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2753                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2754                                        0,
2755                                        0,
2756                                        0,
2757                                        0,
2758                                 /*99*/ 0,
2759                                        0,
2760                                        0,
2761                                        MatConjugate_MPIAIJ,
2762                                        0,
2763                                 /*104*/MatSetValuesRow_MPIAIJ,
2764                                        MatRealPart_MPIAIJ,
2765                                        MatImaginaryPart_MPIAIJ,
2766                                        0,
2767                                        0,
2768                                 /*109*/0,
2769                                        0,
2770                                        MatGetRowMin_MPIAIJ,
2771                                        0,
2772                                        0,
2773                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2774                                        0,
2775                                        0,
2776                                        0,
2777                                        0,
2778                                 /*119*/0,
2779                                        0,
2780                                        0,
2781                                        0,
2782                                        MatGetMultiProcBlock_MPIAIJ,
2783                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2784                                        MatGetColumnNorms_MPIAIJ,
2785                                        MatInvertBlockDiagonal_MPIAIJ,
2786                                        0,
2787                                        MatGetSubMatricesMPI_MPIAIJ,
2788                                 /*129*/0,
2789                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2790                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2791                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2792                                        0,
2793                                 /*134*/0,
2794                                        0,
2795                                        0,
2796                                        0,
2797                                        0,
2798                                 /*139*/0,
2799                                        0,
2800                                        0,
2801                                        MatFDColoringSetUp_MPIXAIJ,
2802                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2803                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2804 };
2805 
2806 /* ----------------------------------------------------------------------------------------*/
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2810 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2811 {
2812   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2813   PetscErrorCode ierr;
2814 
2815   PetscFunctionBegin;
2816   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2817   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 #undef __FUNCT__
2822 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2823 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2824 {
2825   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2826   PetscErrorCode ierr;
2827 
2828   PetscFunctionBegin;
2829   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2830   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 #undef __FUNCT__
2835 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2836 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2837 {
2838   Mat_MPIAIJ     *b;
2839   PetscErrorCode ierr;
2840 
2841   PetscFunctionBegin;
2842   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2843   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2844   b = (Mat_MPIAIJ*)B->data;
2845 
2846   if (!B->preallocated) {
2847     /* Explicitly create 2 MATSEQAIJ matrices. */
2848     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2849     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2850     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2851     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2852     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2853     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2854     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2855     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2856     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2857     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2858   }
2859 
2860   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2861   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2862   B->preallocated = PETSC_TRUE;
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #undef __FUNCT__
2867 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2868 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2869 {
2870   Mat            mat;
2871   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2872   PetscErrorCode ierr;
2873 
2874   PetscFunctionBegin;
2875   *newmat = 0;
2876   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2877   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2878   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2879   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2880   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2881   a       = (Mat_MPIAIJ*)mat->data;
2882 
2883   mat->factortype   = matin->factortype;
2884   mat->assembled    = PETSC_TRUE;
2885   mat->insertmode   = NOT_SET_VALUES;
2886   mat->preallocated = PETSC_TRUE;
2887 
2888   a->size         = oldmat->size;
2889   a->rank         = oldmat->rank;
2890   a->donotstash   = oldmat->donotstash;
2891   a->roworiented  = oldmat->roworiented;
2892   a->rowindices   = 0;
2893   a->rowvalues    = 0;
2894   a->getrowactive = PETSC_FALSE;
2895 
2896   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2897   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2898 
2899   if (oldmat->colmap) {
2900 #if defined(PETSC_USE_CTABLE)
2901     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2902 #else
2903     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2904     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2905     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2906 #endif
2907   } else a->colmap = 0;
2908   if (oldmat->garray) {
2909     PetscInt len;
2910     len  = oldmat->B->cmap->n;
2911     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2912     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2913     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2914   } else a->garray = 0;
2915 
2916   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2917   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2918   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2919   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2920   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2921   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2922   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2923   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2924   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2925   *newmat = mat;
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 
2930 
2931 #undef __FUNCT__
2932 #define __FUNCT__ "MatLoad_MPIAIJ"
2933 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2934 {
2935   PetscScalar    *vals,*svals;
2936   MPI_Comm       comm;
2937   PetscErrorCode ierr;
2938   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2939   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2940   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2941   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2942   PetscInt       cend,cstart,n,*rowners;
2943   int            fd;
2944   PetscInt       bs = newMat->rmap->bs;
2945 
2946   PetscFunctionBegin;
2947   /* force binary viewer to load .info file if it has not yet done so */
2948   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2949   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2950   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2951   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2952   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2953   if (!rank) {
2954     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2955     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2956   }
2957 
2958   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2959   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2960   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2961   if (bs < 0) bs = 1;
2962 
2963   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2964   M    = header[1]; N = header[2];
2965 
2966   /* If global sizes are set, check if they are consistent with that given in the file */
2967   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2968   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2969 
2970   /* determine ownership of all (block) rows */
2971   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2972   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2973   else m = newMat->rmap->n; /* Set by user */
2974 
2975   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2976   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2977 
2978   /* First process needs enough room for process with most rows */
2979   if (!rank) {
2980     mmax = rowners[1];
2981     for (i=2; i<=size; i++) {
2982       mmax = PetscMax(mmax, rowners[i]);
2983     }
2984   } else mmax = -1;             /* unused, but compilers complain */
2985 
2986   rowners[0] = 0;
2987   for (i=2; i<=size; i++) {
2988     rowners[i] += rowners[i-1];
2989   }
2990   rstart = rowners[rank];
2991   rend   = rowners[rank+1];
2992 
2993   /* distribute row lengths to all processors */
2994   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2995   if (!rank) {
2996     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2997     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2998     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2999     for (j=0; j<m; j++) {
3000       procsnz[0] += ourlens[j];
3001     }
3002     for (i=1; i<size; i++) {
3003       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3004       /* calculate the number of nonzeros on each processor */
3005       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3006         procsnz[i] += rowlengths[j];
3007       }
3008       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3009     }
3010     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3011   } else {
3012     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3013   }
3014 
3015   if (!rank) {
3016     /* determine max buffer needed and allocate it */
3017     maxnz = 0;
3018     for (i=0; i<size; i++) {
3019       maxnz = PetscMax(maxnz,procsnz[i]);
3020     }
3021     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3022 
3023     /* read in my part of the matrix column indices  */
3024     nz   = procsnz[0];
3025     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3026     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3027 
3028     /* read in every one elses and ship off */
3029     for (i=1; i<size; i++) {
3030       nz   = procsnz[i];
3031       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3032       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3033     }
3034     ierr = PetscFree(cols);CHKERRQ(ierr);
3035   } else {
3036     /* determine buffer space needed for message */
3037     nz = 0;
3038     for (i=0; i<m; i++) {
3039       nz += ourlens[i];
3040     }
3041     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3042 
3043     /* receive message of column indices*/
3044     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3045   }
3046 
3047   /* determine column ownership if matrix is not square */
3048   if (N != M) {
3049     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3050     else n = newMat->cmap->n;
3051     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3052     cstart = cend - n;
3053   } else {
3054     cstart = rstart;
3055     cend   = rend;
3056     n      = cend - cstart;
3057   }
3058 
3059   /* loop over local rows, determining number of off diagonal entries */
3060   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3061   jj   = 0;
3062   for (i=0; i<m; i++) {
3063     for (j=0; j<ourlens[i]; j++) {
3064       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3065       jj++;
3066     }
3067   }
3068 
3069   for (i=0; i<m; i++) {
3070     ourlens[i] -= offlens[i];
3071   }
3072   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3073 
3074   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3075 
3076   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3077 
3078   for (i=0; i<m; i++) {
3079     ourlens[i] += offlens[i];
3080   }
3081 
3082   if (!rank) {
3083     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3084 
3085     /* read in my part of the matrix numerical values  */
3086     nz   = procsnz[0];
3087     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3088 
3089     /* insert into matrix */
3090     jj      = rstart;
3091     smycols = mycols;
3092     svals   = vals;
3093     for (i=0; i<m; i++) {
3094       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3095       smycols += ourlens[i];
3096       svals   += ourlens[i];
3097       jj++;
3098     }
3099 
3100     /* read in other processors and ship out */
3101     for (i=1; i<size; i++) {
3102       nz   = procsnz[i];
3103       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3104       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3105     }
3106     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3107   } else {
3108     /* receive numeric values */
3109     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3110 
3111     /* receive message of values*/
3112     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3113 
3114     /* insert into matrix */
3115     jj      = rstart;
3116     smycols = mycols;
3117     svals   = vals;
3118     for (i=0; i<m; i++) {
3119       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3120       smycols += ourlens[i];
3121       svals   += ourlens[i];
3122       jj++;
3123     }
3124   }
3125   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3126   ierr = PetscFree(vals);CHKERRQ(ierr);
3127   ierr = PetscFree(mycols);CHKERRQ(ierr);
3128   ierr = PetscFree(rowners);CHKERRQ(ierr);
3129   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3130   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3131   PetscFunctionReturn(0);
3132 }
3133 
3134 #undef __FUNCT__
3135 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3136 /* TODO: Not scalable because of ISAllGather(). */
3137 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3138 {
3139   PetscErrorCode ierr;
3140   IS             iscol_local;
3141   PetscInt       csize;
3142 
3143   PetscFunctionBegin;
3144   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3145   if (call == MAT_REUSE_MATRIX) {
3146     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3147     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3148   } else {
3149     PetscInt cbs;
3150     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3151     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3152     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3153   }
3154   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3155   if (call == MAT_INITIAL_MATRIX) {
3156     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3157     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3158   }
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3163 #undef __FUNCT__
3164 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3165 /*
3166     Not great since it makes two copies of the submatrix, first an SeqAIJ
3167   in local and then by concatenating the local matrices the end result.
3168   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3169 
3170   Note: This requires a sequential iscol with all indices.
3171 */
3172 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3173 {
3174   PetscErrorCode ierr;
3175   PetscMPIInt    rank,size;
3176   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3177   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3178   PetscBool      allcolumns, colflag;
3179   Mat            M,Mreuse;
3180   MatScalar      *vwork,*aa;
3181   MPI_Comm       comm;
3182   Mat_SeqAIJ     *aij;
3183 
3184   PetscFunctionBegin;
3185   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3186   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3187   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3188 
3189   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3190   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3191   if (colflag && ncol == mat->cmap->N) {
3192     allcolumns = PETSC_TRUE;
3193   } else {
3194     allcolumns = PETSC_FALSE;
3195   }
3196   if (call ==  MAT_REUSE_MATRIX) {
3197     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3198     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3199     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3200   } else {
3201     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3202   }
3203 
3204   /*
3205       m - number of local rows
3206       n - number of columns (same on all processors)
3207       rstart - first row in new global matrix generated
3208   */
3209   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3210   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3211   if (call == MAT_INITIAL_MATRIX) {
3212     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3213     ii  = aij->i;
3214     jj  = aij->j;
3215 
3216     /*
3217         Determine the number of non-zeros in the diagonal and off-diagonal
3218         portions of the matrix in order to do correct preallocation
3219     */
3220 
3221     /* first get start and end of "diagonal" columns */
3222     if (csize == PETSC_DECIDE) {
3223       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3224       if (mglobal == n) { /* square matrix */
3225         nlocal = m;
3226       } else {
3227         nlocal = n/size + ((n % size) > rank);
3228       }
3229     } else {
3230       nlocal = csize;
3231     }
3232     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3233     rstart = rend - nlocal;
3234     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3235 
3236     /* next, compute all the lengths */
3237     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3238     olens = dlens + m;
3239     for (i=0; i<m; i++) {
3240       jend = ii[i+1] - ii[i];
3241       olen = 0;
3242       dlen = 0;
3243       for (j=0; j<jend; j++) {
3244         if (*jj < rstart || *jj >= rend) olen++;
3245         else dlen++;
3246         jj++;
3247       }
3248       olens[i] = olen;
3249       dlens[i] = dlen;
3250     }
3251     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3252     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3253     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3254     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3255     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3256     ierr = PetscFree(dlens);CHKERRQ(ierr);
3257   } else {
3258     PetscInt ml,nl;
3259 
3260     M    = *newmat;
3261     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3262     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3263     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3264     /*
3265          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3266        rather than the slower MatSetValues().
3267     */
3268     M->was_assembled = PETSC_TRUE;
3269     M->assembled     = PETSC_FALSE;
3270   }
3271   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3272   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3273   ii   = aij->i;
3274   jj   = aij->j;
3275   aa   = aij->a;
3276   for (i=0; i<m; i++) {
3277     row   = rstart + i;
3278     nz    = ii[i+1] - ii[i];
3279     cwork = jj;     jj += nz;
3280     vwork = aa;     aa += nz;
3281     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3282   }
3283 
3284   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286   *newmat = M;
3287 
3288   /* save submatrix used in processor for next request */
3289   if (call ==  MAT_INITIAL_MATRIX) {
3290     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3291     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3292   }
3293   PetscFunctionReturn(0);
3294 }
3295 
3296 #undef __FUNCT__
3297 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3298 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3299 {
3300   PetscInt       m,cstart, cend,j,nnz,i,d;
3301   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3302   const PetscInt *JJ;
3303   PetscScalar    *values;
3304   PetscErrorCode ierr;
3305 
3306   PetscFunctionBegin;
3307   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3308 
3309   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3310   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3311   m      = B->rmap->n;
3312   cstart = B->cmap->rstart;
3313   cend   = B->cmap->rend;
3314   rstart = B->rmap->rstart;
3315 
3316   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3317 
3318 #if defined(PETSC_USE_DEBUGGING)
3319   for (i=0; i<m; i++) {
3320     nnz = Ii[i+1]- Ii[i];
3321     JJ  = J + Ii[i];
3322     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3323     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3324     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3325   }
3326 #endif
3327 
3328   for (i=0; i<m; i++) {
3329     nnz     = Ii[i+1]- Ii[i];
3330     JJ      = J + Ii[i];
3331     nnz_max = PetscMax(nnz_max,nnz);
3332     d       = 0;
3333     for (j=0; j<nnz; j++) {
3334       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3335     }
3336     d_nnz[i] = d;
3337     o_nnz[i] = nnz - d;
3338   }
3339   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3340   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3341 
3342   if (v) values = (PetscScalar*)v;
3343   else {
3344     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3345   }
3346 
3347   for (i=0; i<m; i++) {
3348     ii   = i + rstart;
3349     nnz  = Ii[i+1]- Ii[i];
3350     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3351   }
3352   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3353   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3354 
3355   if (!v) {
3356     ierr = PetscFree(values);CHKERRQ(ierr);
3357   }
3358   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 #undef __FUNCT__
3363 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3364 /*@
3365    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3366    (the default parallel PETSc format).
3367 
3368    Collective on MPI_Comm
3369 
3370    Input Parameters:
3371 +  B - the matrix
3372 .  i - the indices into j for the start of each local row (starts with zero)
3373 .  j - the column indices for each local row (starts with zero)
3374 -  v - optional values in the matrix
3375 
3376    Level: developer
3377 
3378    Notes:
3379        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3380      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3381      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3382 
3383        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3384 
3385        The format which is used for the sparse matrix input, is equivalent to a
3386     row-major ordering.. i.e for the following matrix, the input data expected is
3387     as shown:
3388 
3389         1 0 0
3390         2 0 3     P0
3391        -------
3392         4 5 6     P1
3393 
3394      Process0 [P0]: rows_owned=[0,1]
3395         i =  {0,1,3}  [size = nrow+1  = 2+1]
3396         j =  {0,0,2}  [size = nz = 6]
3397         v =  {1,2,3}  [size = nz = 6]
3398 
3399      Process1 [P1]: rows_owned=[2]
3400         i =  {0,3}    [size = nrow+1  = 1+1]
3401         j =  {0,1,2}  [size = nz = 6]
3402         v =  {4,5,6}  [size = nz = 6]
3403 
3404 .keywords: matrix, aij, compressed row, sparse, parallel
3405 
3406 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3407           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3408 @*/
3409 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3410 {
3411   PetscErrorCode ierr;
3412 
3413   PetscFunctionBegin;
3414   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3415   PetscFunctionReturn(0);
3416 }
3417 
3418 #undef __FUNCT__
3419 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3420 /*@C
3421    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3422    (the default parallel PETSc format).  For good matrix assembly performance
3423    the user should preallocate the matrix storage by setting the parameters
3424    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3425    performance can be increased by more than a factor of 50.
3426 
3427    Collective on MPI_Comm
3428 
3429    Input Parameters:
3430 +  B - the matrix
3431 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3432            (same value is used for all local rows)
3433 .  d_nnz - array containing the number of nonzeros in the various rows of the
3434            DIAGONAL portion of the local submatrix (possibly different for each row)
3435            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3436            The size of this array is equal to the number of local rows, i.e 'm'.
3437            For matrices that will be factored, you must leave room for (and set)
3438            the diagonal entry even if it is zero.
3439 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3440            submatrix (same value is used for all local rows).
3441 -  o_nnz - array containing the number of nonzeros in the various rows of the
3442            OFF-DIAGONAL portion of the local submatrix (possibly different for
3443            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3444            structure. The size of this array is equal to the number
3445            of local rows, i.e 'm'.
3446 
3447    If the *_nnz parameter is given then the *_nz parameter is ignored
3448 
3449    The AIJ format (also called the Yale sparse matrix format or
3450    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3451    storage.  The stored row and column indices begin with zero.
3452    See Users-Manual: ch_mat for details.
3453 
3454    The parallel matrix is partitioned such that the first m0 rows belong to
3455    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3456    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3457 
3458    The DIAGONAL portion of the local submatrix of a processor can be defined
3459    as the submatrix which is obtained by extraction the part corresponding to
3460    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3461    first row that belongs to the processor, r2 is the last row belonging to
3462    the this processor, and c1-c2 is range of indices of the local part of a
3463    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3464    common case of a square matrix, the row and column ranges are the same and
3465    the DIAGONAL part is also square. The remaining portion of the local
3466    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3467 
3468    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3469 
3470    You can call MatGetInfo() to get information on how effective the preallocation was;
3471    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3472    You can also run with the option -info and look for messages with the string
3473    malloc in them to see if additional memory allocation was needed.
3474 
3475    Example usage:
3476 
3477    Consider the following 8x8 matrix with 34 non-zero values, that is
3478    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3479    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3480    as follows:
3481 
3482 .vb
3483             1  2  0  |  0  3  0  |  0  4
3484     Proc0   0  5  6  |  7  0  0  |  8  0
3485             9  0 10  | 11  0  0  | 12  0
3486     -------------------------------------
3487            13  0 14  | 15 16 17  |  0  0
3488     Proc1   0 18  0  | 19 20 21  |  0  0
3489             0  0  0  | 22 23  0  | 24  0
3490     -------------------------------------
3491     Proc2  25 26 27  |  0  0 28  | 29  0
3492            30  0  0  | 31 32 33  |  0 34
3493 .ve
3494 
3495    This can be represented as a collection of submatrices as:
3496 
3497 .vb
3498       A B C
3499       D E F
3500       G H I
3501 .ve
3502 
3503    Where the submatrices A,B,C are owned by proc0, D,E,F are
3504    owned by proc1, G,H,I are owned by proc2.
3505 
3506    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3507    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3508    The 'M','N' parameters are 8,8, and have the same values on all procs.
3509 
3510    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3511    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3512    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3513    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3514    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3515    matrix, ans [DF] as another SeqAIJ matrix.
3516 
3517    When d_nz, o_nz parameters are specified, d_nz storage elements are
3518    allocated for every row of the local diagonal submatrix, and o_nz
3519    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3520    One way to choose d_nz and o_nz is to use the max nonzerors per local
3521    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3522    In this case, the values of d_nz,o_nz are:
3523 .vb
3524      proc0 : dnz = 2, o_nz = 2
3525      proc1 : dnz = 3, o_nz = 2
3526      proc2 : dnz = 1, o_nz = 4
3527 .ve
3528    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3529    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3530    for proc3. i.e we are using 12+15+10=37 storage locations to store
3531    34 values.
3532 
3533    When d_nnz, o_nnz parameters are specified, the storage is specified
3534    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3535    In the above case the values for d_nnz,o_nnz are:
3536 .vb
3537      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3538      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3539      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3540 .ve
3541    Here the space allocated is sum of all the above values i.e 34, and
3542    hence pre-allocation is perfect.
3543 
3544    Level: intermediate
3545 
3546 .keywords: matrix, aij, compressed row, sparse, parallel
3547 
3548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3549           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3550 @*/
3551 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3552 {
3553   PetscErrorCode ierr;
3554 
3555   PetscFunctionBegin;
3556   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3557   PetscValidType(B,1);
3558   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3559   PetscFunctionReturn(0);
3560 }
3561 
3562 #undef __FUNCT__
3563 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3564 /*@
3565      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3566          CSR format the local rows.
3567 
3568    Collective on MPI_Comm
3569 
3570    Input Parameters:
3571 +  comm - MPI communicator
3572 .  m - number of local rows (Cannot be PETSC_DECIDE)
3573 .  n - This value should be the same as the local size used in creating the
3574        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3575        calculated if N is given) For square matrices n is almost always m.
3576 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3577 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3578 .   i - row indices
3579 .   j - column indices
3580 -   a - matrix values
3581 
3582    Output Parameter:
3583 .   mat - the matrix
3584 
3585    Level: intermediate
3586 
3587    Notes:
3588        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3589      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3590      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3591 
3592        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3593 
3594        The format which is used for the sparse matrix input, is equivalent to a
3595     row-major ordering.. i.e for the following matrix, the input data expected is
3596     as shown:
3597 
3598         1 0 0
3599         2 0 3     P0
3600        -------
3601         4 5 6     P1
3602 
3603      Process0 [P0]: rows_owned=[0,1]
3604         i =  {0,1,3}  [size = nrow+1  = 2+1]
3605         j =  {0,0,2}  [size = nz = 6]
3606         v =  {1,2,3}  [size = nz = 6]
3607 
3608      Process1 [P1]: rows_owned=[2]
3609         i =  {0,3}    [size = nrow+1  = 1+1]
3610         j =  {0,1,2}  [size = nz = 6]
3611         v =  {4,5,6}  [size = nz = 6]
3612 
3613 .keywords: matrix, aij, compressed row, sparse, parallel
3614 
3615 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3616           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3617 @*/
3618 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3619 {
3620   PetscErrorCode ierr;
3621 
3622   PetscFunctionBegin;
3623   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3624   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3625   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3626   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3627   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3628   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3629   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3630   PetscFunctionReturn(0);
3631 }
3632 
3633 #undef __FUNCT__
3634 #define __FUNCT__ "MatCreateAIJ"
3635 /*@C
3636    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3637    (the default parallel PETSc format).  For good matrix assembly performance
3638    the user should preallocate the matrix storage by setting the parameters
3639    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3640    performance can be increased by more than a factor of 50.
3641 
3642    Collective on MPI_Comm
3643 
3644    Input Parameters:
3645 +  comm - MPI communicator
3646 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3647            This value should be the same as the local size used in creating the
3648            y vector for the matrix-vector product y = Ax.
3649 .  n - This value should be the same as the local size used in creating the
3650        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3651        calculated if N is given) For square matrices n is almost always m.
3652 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3653 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3654 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3655            (same value is used for all local rows)
3656 .  d_nnz - array containing the number of nonzeros in the various rows of the
3657            DIAGONAL portion of the local submatrix (possibly different for each row)
3658            or NULL, if d_nz is used to specify the nonzero structure.
3659            The size of this array is equal to the number of local rows, i.e 'm'.
3660 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3661            submatrix (same value is used for all local rows).
3662 -  o_nnz - array containing the number of nonzeros in the various rows of the
3663            OFF-DIAGONAL portion of the local submatrix (possibly different for
3664            each row) or NULL, if o_nz is used to specify the nonzero
3665            structure. The size of this array is equal to the number
3666            of local rows, i.e 'm'.
3667 
3668    Output Parameter:
3669 .  A - the matrix
3670 
3671    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3672    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3673    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3674 
3675    Notes:
3676    If the *_nnz parameter is given then the *_nz parameter is ignored
3677 
3678    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3679    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3680    storage requirements for this matrix.
3681 
3682    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3683    processor than it must be used on all processors that share the object for
3684    that argument.
3685 
3686    The user MUST specify either the local or global matrix dimensions
3687    (possibly both).
3688 
3689    The parallel matrix is partitioned across processors such that the
3690    first m0 rows belong to process 0, the next m1 rows belong to
3691    process 1, the next m2 rows belong to process 2 etc.. where
3692    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3693    values corresponding to [m x N] submatrix.
3694 
3695    The columns are logically partitioned with the n0 columns belonging
3696    to 0th partition, the next n1 columns belonging to the next
3697    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3698 
3699    The DIAGONAL portion of the local submatrix on any given processor
3700    is the submatrix corresponding to the rows and columns m,n
3701    corresponding to the given processor. i.e diagonal matrix on
3702    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3703    etc. The remaining portion of the local submatrix [m x (N-n)]
3704    constitute the OFF-DIAGONAL portion. The example below better
3705    illustrates this concept.
3706 
3707    For a square global matrix we define each processor's diagonal portion
3708    to be its local rows and the corresponding columns (a square submatrix);
3709    each processor's off-diagonal portion encompasses the remainder of the
3710    local matrix (a rectangular submatrix).
3711 
3712    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3713 
3714    When calling this routine with a single process communicator, a matrix of
3715    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3716    type of communicator, use the construction mechanism:
3717      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3718 
3719    By default, this format uses inodes (identical nodes) when possible.
3720    We search for consecutive rows with the same nonzero structure, thereby
3721    reusing matrix information to achieve increased efficiency.
3722 
3723    Options Database Keys:
3724 +  -mat_no_inode  - Do not use inodes
3725 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3726 -  -mat_aij_oneindex - Internally use indexing starting at 1
3727         rather than 0.  Note that when calling MatSetValues(),
3728         the user still MUST index entries starting at 0!
3729 
3730 
3731    Example usage:
3732 
3733    Consider the following 8x8 matrix with 34 non-zero values, that is
3734    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3735    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3736    as follows:
3737 
3738 .vb
3739             1  2  0  |  0  3  0  |  0  4
3740     Proc0   0  5  6  |  7  0  0  |  8  0
3741             9  0 10  | 11  0  0  | 12  0
3742     -------------------------------------
3743            13  0 14  | 15 16 17  |  0  0
3744     Proc1   0 18  0  | 19 20 21  |  0  0
3745             0  0  0  | 22 23  0  | 24  0
3746     -------------------------------------
3747     Proc2  25 26 27  |  0  0 28  | 29  0
3748            30  0  0  | 31 32 33  |  0 34
3749 .ve
3750 
3751    This can be represented as a collection of submatrices as:
3752 
3753 .vb
3754       A B C
3755       D E F
3756       G H I
3757 .ve
3758 
3759    Where the submatrices A,B,C are owned by proc0, D,E,F are
3760    owned by proc1, G,H,I are owned by proc2.
3761 
3762    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3763    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3764    The 'M','N' parameters are 8,8, and have the same values on all procs.
3765 
3766    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3767    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3768    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3769    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3770    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3771    matrix, ans [DF] as another SeqAIJ matrix.
3772 
3773    When d_nz, o_nz parameters are specified, d_nz storage elements are
3774    allocated for every row of the local diagonal submatrix, and o_nz
3775    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3776    One way to choose d_nz and o_nz is to use the max nonzerors per local
3777    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3778    In this case, the values of d_nz,o_nz are:
3779 .vb
3780      proc0 : dnz = 2, o_nz = 2
3781      proc1 : dnz = 3, o_nz = 2
3782      proc2 : dnz = 1, o_nz = 4
3783 .ve
3784    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3785    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3786    for proc3. i.e we are using 12+15+10=37 storage locations to store
3787    34 values.
3788 
3789    When d_nnz, o_nnz parameters are specified, the storage is specified
3790    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3791    In the above case the values for d_nnz,o_nnz are:
3792 .vb
3793      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3794      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3795      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3796 .ve
3797    Here the space allocated is sum of all the above values i.e 34, and
3798    hence pre-allocation is perfect.
3799 
3800    Level: intermediate
3801 
3802 .keywords: matrix, aij, compressed row, sparse, parallel
3803 
3804 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3805           MPIAIJ, MatCreateMPIAIJWithArrays()
3806 @*/
3807 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3808 {
3809   PetscErrorCode ierr;
3810   PetscMPIInt    size;
3811 
3812   PetscFunctionBegin;
3813   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3814   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3815   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3816   if (size > 1) {
3817     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3818     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3819   } else {
3820     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3821     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3822   }
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 #undef __FUNCT__
3827 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3828 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3829 {
3830   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3831 
3832   PetscFunctionBegin;
3833   if (Ad)     *Ad     = a->A;
3834   if (Ao)     *Ao     = a->B;
3835   if (colmap) *colmap = a->garray;
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3841 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3842 {
3843   PetscErrorCode ierr;
3844   PetscInt       i;
3845   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3846 
3847   PetscFunctionBegin;
3848   if (coloring->ctype == IS_COLORING_GLOBAL) {
3849     ISColoringValue *allcolors,*colors;
3850     ISColoring      ocoloring;
3851 
3852     /* set coloring for diagonal portion */
3853     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3854 
3855     /* set coloring for off-diagonal portion */
3856     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3857     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3858     for (i=0; i<a->B->cmap->n; i++) {
3859       colors[i] = allcolors[a->garray[i]];
3860     }
3861     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3862     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3863     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3864     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3865   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3866     ISColoringValue *colors;
3867     PetscInt        *larray;
3868     ISColoring      ocoloring;
3869 
3870     /* set coloring for diagonal portion */
3871     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3872     for (i=0; i<a->A->cmap->n; i++) {
3873       larray[i] = i + A->cmap->rstart;
3874     }
3875     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3876     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3877     for (i=0; i<a->A->cmap->n; i++) {
3878       colors[i] = coloring->colors[larray[i]];
3879     }
3880     ierr = PetscFree(larray);CHKERRQ(ierr);
3881     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3882     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3883     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3884 
3885     /* set coloring for off-diagonal portion */
3886     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3887     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3888     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3889     for (i=0; i<a->B->cmap->n; i++) {
3890       colors[i] = coloring->colors[larray[i]];
3891     }
3892     ierr = PetscFree(larray);CHKERRQ(ierr);
3893     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3894     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3895     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3896   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3897   PetscFunctionReturn(0);
3898 }
3899 
3900 #undef __FUNCT__
3901 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3902 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3903 {
3904   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3905   PetscErrorCode ierr;
3906 
3907   PetscFunctionBegin;
3908   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3909   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3910   PetscFunctionReturn(0);
3911 }
3912 
3913 #undef __FUNCT__
3914 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3915 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3916 {
3917   PetscErrorCode ierr;
3918   PetscInt       m,N,i,rstart,nnz,Ii;
3919   PetscInt       *indx;
3920   PetscScalar    *values;
3921 
3922   PetscFunctionBegin;
3923   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3924   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3925     PetscInt       *dnz,*onz,sum,bs,cbs;
3926 
3927     if (n == PETSC_DECIDE) {
3928       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3929     }
3930     /* Check sum(n) = N */
3931     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3932     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3933 
3934     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3935     rstart -= m;
3936 
3937     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3938     for (i=0; i<m; i++) {
3939       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3940       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3941       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3942     }
3943 
3944     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3945     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3946     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3947     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3948     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3949     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3950     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3951   }
3952 
3953   /* numeric phase */
3954   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3955   for (i=0; i<m; i++) {
3956     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3957     Ii   = i + rstart;
3958     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3959     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3960   }
3961   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3962   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 #undef __FUNCT__
3967 #define __FUNCT__ "MatFileSplit"
3968 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3969 {
3970   PetscErrorCode    ierr;
3971   PetscMPIInt       rank;
3972   PetscInt          m,N,i,rstart,nnz;
3973   size_t            len;
3974   const PetscInt    *indx;
3975   PetscViewer       out;
3976   char              *name;
3977   Mat               B;
3978   const PetscScalar *values;
3979 
3980   PetscFunctionBegin;
3981   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3982   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3983   /* Should this be the type of the diagonal block of A? */
3984   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3985   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3986   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3987   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3988   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3989   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3990   for (i=0; i<m; i++) {
3991     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3992     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3993     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3994   }
3995   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3996   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3997 
3998   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3999   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4000   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4001   sprintf(name,"%s.%d",outfile,rank);
4002   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4003   ierr = PetscFree(name);CHKERRQ(ierr);
4004   ierr = MatView(B,out);CHKERRQ(ierr);
4005   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4006   ierr = MatDestroy(&B);CHKERRQ(ierr);
4007   PetscFunctionReturn(0);
4008 }
4009 
4010 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4011 #undef __FUNCT__
4012 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4013 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4014 {
4015   PetscErrorCode      ierr;
4016   Mat_Merge_SeqsToMPI *merge;
4017   PetscContainer      container;
4018 
4019   PetscFunctionBegin;
4020   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4021   if (container) {
4022     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4023     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4024     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4025     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4026     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4027     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4028     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4029     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4030     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4031     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4032     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4033     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4034     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4035     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4036     ierr = PetscFree(merge);CHKERRQ(ierr);
4037     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4038   }
4039   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4040   PetscFunctionReturn(0);
4041 }
4042 
4043 #include <../src/mat/utils/freespace.h>
4044 #include <petscbt.h>
4045 
4046 #undef __FUNCT__
4047 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4048 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4049 {
4050   PetscErrorCode      ierr;
4051   MPI_Comm            comm;
4052   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4053   PetscMPIInt         size,rank,taga,*len_s;
4054   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4055   PetscInt            proc,m;
4056   PetscInt            **buf_ri,**buf_rj;
4057   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4058   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4059   MPI_Request         *s_waits,*r_waits;
4060   MPI_Status          *status;
4061   MatScalar           *aa=a->a;
4062   MatScalar           **abuf_r,*ba_i;
4063   Mat_Merge_SeqsToMPI *merge;
4064   PetscContainer      container;
4065 
4066   PetscFunctionBegin;
4067   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4068   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4069 
4070   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4071   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4072 
4073   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4074   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4075 
4076   bi     = merge->bi;
4077   bj     = merge->bj;
4078   buf_ri = merge->buf_ri;
4079   buf_rj = merge->buf_rj;
4080 
4081   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4082   owners = merge->rowmap->range;
4083   len_s  = merge->len_s;
4084 
4085   /* send and recv matrix values */
4086   /*-----------------------------*/
4087   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4088   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4089 
4090   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4091   for (proc=0,k=0; proc<size; proc++) {
4092     if (!len_s[proc]) continue;
4093     i    = owners[proc];
4094     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4095     k++;
4096   }
4097 
4098   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4099   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4100   ierr = PetscFree(status);CHKERRQ(ierr);
4101 
4102   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4103   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4104 
4105   /* insert mat values of mpimat */
4106   /*----------------------------*/
4107   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4108   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4109 
4110   for (k=0; k<merge->nrecv; k++) {
4111     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4112     nrows       = *(buf_ri_k[k]);
4113     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4114     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4115   }
4116 
4117   /* set values of ba */
4118   m = merge->rowmap->n;
4119   for (i=0; i<m; i++) {
4120     arow = owners[rank] + i;
4121     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4122     bnzi = bi[i+1] - bi[i];
4123     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4124 
4125     /* add local non-zero vals of this proc's seqmat into ba */
4126     anzi   = ai[arow+1] - ai[arow];
4127     aj     = a->j + ai[arow];
4128     aa     = a->a + ai[arow];
4129     nextaj = 0;
4130     for (j=0; nextaj<anzi; j++) {
4131       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4132         ba_i[j] += aa[nextaj++];
4133       }
4134     }
4135 
4136     /* add received vals into ba */
4137     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4138       /* i-th row */
4139       if (i == *nextrow[k]) {
4140         anzi   = *(nextai[k]+1) - *nextai[k];
4141         aj     = buf_rj[k] + *(nextai[k]);
4142         aa     = abuf_r[k] + *(nextai[k]);
4143         nextaj = 0;
4144         for (j=0; nextaj<anzi; j++) {
4145           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4146             ba_i[j] += aa[nextaj++];
4147           }
4148         }
4149         nextrow[k]++; nextai[k]++;
4150       }
4151     }
4152     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4153   }
4154   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4155   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4156 
4157   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4158   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4159   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4160   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4161   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4162   PetscFunctionReturn(0);
4163 }
4164 
4165 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4166 
4167 #undef __FUNCT__
4168 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4169 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4170 {
4171   PetscErrorCode      ierr;
4172   Mat                 B_mpi;
4173   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4174   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4175   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4176   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4177   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4178   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4179   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4180   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4181   MPI_Status          *status;
4182   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4183   PetscBT             lnkbt;
4184   Mat_Merge_SeqsToMPI *merge;
4185   PetscContainer      container;
4186 
4187   PetscFunctionBegin;
4188   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4189 
4190   /* make sure it is a PETSc comm */
4191   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4192   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4193   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4194 
4195   ierr = PetscNew(&merge);CHKERRQ(ierr);
4196   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4197 
4198   /* determine row ownership */
4199   /*---------------------------------------------------------*/
4200   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4201   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4202   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4203   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4204   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4205   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4206   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4207 
4208   m      = merge->rowmap->n;
4209   owners = merge->rowmap->range;
4210 
4211   /* determine the number of messages to send, their lengths */
4212   /*---------------------------------------------------------*/
4213   len_s = merge->len_s;
4214 
4215   len          = 0; /* length of buf_si[] */
4216   merge->nsend = 0;
4217   for (proc=0; proc<size; proc++) {
4218     len_si[proc] = 0;
4219     if (proc == rank) {
4220       len_s[proc] = 0;
4221     } else {
4222       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4223       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4224     }
4225     if (len_s[proc]) {
4226       merge->nsend++;
4227       nrows = 0;
4228       for (i=owners[proc]; i<owners[proc+1]; i++) {
4229         if (ai[i+1] > ai[i]) nrows++;
4230       }
4231       len_si[proc] = 2*(nrows+1);
4232       len         += len_si[proc];
4233     }
4234   }
4235 
4236   /* determine the number and length of messages to receive for ij-structure */
4237   /*-------------------------------------------------------------------------*/
4238   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4239   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4240 
4241   /* post the Irecv of j-structure */
4242   /*-------------------------------*/
4243   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4244   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4245 
4246   /* post the Isend of j-structure */
4247   /*--------------------------------*/
4248   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4249 
4250   for (proc=0, k=0; proc<size; proc++) {
4251     if (!len_s[proc]) continue;
4252     i    = owners[proc];
4253     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4254     k++;
4255   }
4256 
4257   /* receives and sends of j-structure are complete */
4258   /*------------------------------------------------*/
4259   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4260   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4261 
4262   /* send and recv i-structure */
4263   /*---------------------------*/
4264   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4265   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4266 
4267   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4268   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4269   for (proc=0,k=0; proc<size; proc++) {
4270     if (!len_s[proc]) continue;
4271     /* form outgoing message for i-structure:
4272          buf_si[0]:                 nrows to be sent
4273                [1:nrows]:           row index (global)
4274                [nrows+1:2*nrows+1]: i-structure index
4275     */
4276     /*-------------------------------------------*/
4277     nrows       = len_si[proc]/2 - 1;
4278     buf_si_i    = buf_si + nrows+1;
4279     buf_si[0]   = nrows;
4280     buf_si_i[0] = 0;
4281     nrows       = 0;
4282     for (i=owners[proc]; i<owners[proc+1]; i++) {
4283       anzi = ai[i+1] - ai[i];
4284       if (anzi) {
4285         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4286         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4287         nrows++;
4288       }
4289     }
4290     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4291     k++;
4292     buf_si += len_si[proc];
4293   }
4294 
4295   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4296   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4297 
4298   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4299   for (i=0; i<merge->nrecv; i++) {
4300     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4301   }
4302 
4303   ierr = PetscFree(len_si);CHKERRQ(ierr);
4304   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4305   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4306   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4307   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4308   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4309   ierr = PetscFree(status);CHKERRQ(ierr);
4310 
4311   /* compute a local seq matrix in each processor */
4312   /*----------------------------------------------*/
4313   /* allocate bi array and free space for accumulating nonzero column info */
4314   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4315   bi[0] = 0;
4316 
4317   /* create and initialize a linked list */
4318   nlnk = N+1;
4319   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4320 
4321   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4322   len  = ai[owners[rank+1]] - ai[owners[rank]];
4323   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4324 
4325   current_space = free_space;
4326 
4327   /* determine symbolic info for each local row */
4328   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4329 
4330   for (k=0; k<merge->nrecv; k++) {
4331     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4332     nrows       = *buf_ri_k[k];
4333     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4334     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4335   }
4336 
4337   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4338   len  = 0;
4339   for (i=0; i<m; i++) {
4340     bnzi = 0;
4341     /* add local non-zero cols of this proc's seqmat into lnk */
4342     arow  = owners[rank] + i;
4343     anzi  = ai[arow+1] - ai[arow];
4344     aj    = a->j + ai[arow];
4345     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4346     bnzi += nlnk;
4347     /* add received col data into lnk */
4348     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4349       if (i == *nextrow[k]) { /* i-th row */
4350         anzi  = *(nextai[k]+1) - *nextai[k];
4351         aj    = buf_rj[k] + *nextai[k];
4352         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4353         bnzi += nlnk;
4354         nextrow[k]++; nextai[k]++;
4355       }
4356     }
4357     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4358 
4359     /* if free space is not available, make more free space */
4360     if (current_space->local_remaining<bnzi) {
4361       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4362       nspacedouble++;
4363     }
4364     /* copy data into free space, then initialize lnk */
4365     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4366     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4367 
4368     current_space->array           += bnzi;
4369     current_space->local_used      += bnzi;
4370     current_space->local_remaining -= bnzi;
4371 
4372     bi[i+1] = bi[i] + bnzi;
4373   }
4374 
4375   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4376 
4377   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4378   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4379   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4380 
4381   /* create symbolic parallel matrix B_mpi */
4382   /*---------------------------------------*/
4383   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4384   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4385   if (n==PETSC_DECIDE) {
4386     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4387   } else {
4388     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4389   }
4390   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4391   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4392   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4393   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4394   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4395 
4396   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4397   B_mpi->assembled    = PETSC_FALSE;
4398   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4399   merge->bi           = bi;
4400   merge->bj           = bj;
4401   merge->buf_ri       = buf_ri;
4402   merge->buf_rj       = buf_rj;
4403   merge->coi          = NULL;
4404   merge->coj          = NULL;
4405   merge->owners_co    = NULL;
4406 
4407   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4408 
4409   /* attach the supporting struct to B_mpi for reuse */
4410   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4411   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4412   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4413   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4414   *mpimat = B_mpi;
4415 
4416   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4417   PetscFunctionReturn(0);
4418 }
4419 
4420 #undef __FUNCT__
4421 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4422 /*@C
4423       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4424                  matrices from each processor
4425 
4426     Collective on MPI_Comm
4427 
4428    Input Parameters:
4429 +    comm - the communicators the parallel matrix will live on
4430 .    seqmat - the input sequential matrices
4431 .    m - number of local rows (or PETSC_DECIDE)
4432 .    n - number of local columns (or PETSC_DECIDE)
4433 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4434 
4435    Output Parameter:
4436 .    mpimat - the parallel matrix generated
4437 
4438     Level: advanced
4439 
4440    Notes:
4441      The dimensions of the sequential matrix in each processor MUST be the same.
4442      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4443      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4444 @*/
4445 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4446 {
4447   PetscErrorCode ierr;
4448   PetscMPIInt    size;
4449 
4450   PetscFunctionBegin;
4451   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4452   if (size == 1) {
4453     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4454     if (scall == MAT_INITIAL_MATRIX) {
4455       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4456     } else {
4457       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4458     }
4459     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4460     PetscFunctionReturn(0);
4461   }
4462   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4463   if (scall == MAT_INITIAL_MATRIX) {
4464     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4465   }
4466   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4467   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 #undef __FUNCT__
4472 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4473 /*@
4474      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4475           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4476           with MatGetSize()
4477 
4478     Not Collective
4479 
4480    Input Parameters:
4481 +    A - the matrix
4482 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4483 
4484    Output Parameter:
4485 .    A_loc - the local sequential matrix generated
4486 
4487     Level: developer
4488 
4489 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4490 
4491 @*/
4492 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4493 {
4494   PetscErrorCode ierr;
4495   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4496   Mat_SeqAIJ     *mat,*a,*b;
4497   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4498   MatScalar      *aa,*ba,*cam;
4499   PetscScalar    *ca;
4500   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4501   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4502   PetscBool      match;
4503   MPI_Comm       comm;
4504   PetscMPIInt    size;
4505 
4506   PetscFunctionBegin;
4507   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4508   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4509   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4510   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4511   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4512 
4513   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4514   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4515   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4516   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4517   aa = a->a; ba = b->a;
4518   if (scall == MAT_INITIAL_MATRIX) {
4519     if (size == 1) {
4520       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4521       PetscFunctionReturn(0);
4522     }
4523 
4524     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4525     ci[0] = 0;
4526     for (i=0; i<am; i++) {
4527       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4528     }
4529     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4530     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4531     k    = 0;
4532     for (i=0; i<am; i++) {
4533       ncols_o = bi[i+1] - bi[i];
4534       ncols_d = ai[i+1] - ai[i];
4535       /* off-diagonal portion of A */
4536       for (jo=0; jo<ncols_o; jo++) {
4537         col = cmap[*bj];
4538         if (col >= cstart) break;
4539         cj[k]   = col; bj++;
4540         ca[k++] = *ba++;
4541       }
4542       /* diagonal portion of A */
4543       for (j=0; j<ncols_d; j++) {
4544         cj[k]   = cstart + *aj++;
4545         ca[k++] = *aa++;
4546       }
4547       /* off-diagonal portion of A */
4548       for (j=jo; j<ncols_o; j++) {
4549         cj[k]   = cmap[*bj++];
4550         ca[k++] = *ba++;
4551       }
4552     }
4553     /* put together the new matrix */
4554     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4555     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4556     /* Since these are PETSc arrays, change flags to free them as necessary. */
4557     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4558     mat->free_a  = PETSC_TRUE;
4559     mat->free_ij = PETSC_TRUE;
4560     mat->nonew   = 0;
4561   } else if (scall == MAT_REUSE_MATRIX) {
4562     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4563     ci = mat->i; cj = mat->j; cam = mat->a;
4564     for (i=0; i<am; i++) {
4565       /* off-diagonal portion of A */
4566       ncols_o = bi[i+1] - bi[i];
4567       for (jo=0; jo<ncols_o; jo++) {
4568         col = cmap[*bj];
4569         if (col >= cstart) break;
4570         *cam++ = *ba++; bj++;
4571       }
4572       /* diagonal portion of A */
4573       ncols_d = ai[i+1] - ai[i];
4574       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4575       /* off-diagonal portion of A */
4576       for (j=jo; j<ncols_o; j++) {
4577         *cam++ = *ba++; bj++;
4578       }
4579     }
4580   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4581   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4582   PetscFunctionReturn(0);
4583 }
4584 
4585 #undef __FUNCT__
4586 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4587 /*@C
4588      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4589 
4590     Not Collective
4591 
4592    Input Parameters:
4593 +    A - the matrix
4594 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4595 -    row, col - index sets of rows and columns to extract (or NULL)
4596 
4597    Output Parameter:
4598 .    A_loc - the local sequential matrix generated
4599 
4600     Level: developer
4601 
4602 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4603 
4604 @*/
4605 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4606 {
4607   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4608   PetscErrorCode ierr;
4609   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4610   IS             isrowa,iscola;
4611   Mat            *aloc;
4612   PetscBool      match;
4613 
4614   PetscFunctionBegin;
4615   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4616   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4617   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4618   if (!row) {
4619     start = A->rmap->rstart; end = A->rmap->rend;
4620     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4621   } else {
4622     isrowa = *row;
4623   }
4624   if (!col) {
4625     start = A->cmap->rstart;
4626     cmap  = a->garray;
4627     nzA   = a->A->cmap->n;
4628     nzB   = a->B->cmap->n;
4629     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4630     ncols = 0;
4631     for (i=0; i<nzB; i++) {
4632       if (cmap[i] < start) idx[ncols++] = cmap[i];
4633       else break;
4634     }
4635     imark = i;
4636     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4637     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4638     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4639   } else {
4640     iscola = *col;
4641   }
4642   if (scall != MAT_INITIAL_MATRIX) {
4643     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4644     aloc[0] = *A_loc;
4645   }
4646   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4647   *A_loc = aloc[0];
4648   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4649   if (!row) {
4650     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4651   }
4652   if (!col) {
4653     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4654   }
4655   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4656   PetscFunctionReturn(0);
4657 }
4658 
4659 #undef __FUNCT__
4660 #define __FUNCT__ "MatGetBrowsOfAcols"
4661 /*@C
4662     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4663 
4664     Collective on Mat
4665 
4666    Input Parameters:
4667 +    A,B - the matrices in mpiaij format
4668 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4669 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4670 
4671    Output Parameter:
4672 +    rowb, colb - index sets of rows and columns of B to extract
4673 -    B_seq - the sequential matrix generated
4674 
4675     Level: developer
4676 
4677 @*/
4678 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4679 {
4680   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4681   PetscErrorCode ierr;
4682   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4683   IS             isrowb,iscolb;
4684   Mat            *bseq=NULL;
4685 
4686   PetscFunctionBegin;
4687   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4688     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4689   }
4690   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4691 
4692   if (scall == MAT_INITIAL_MATRIX) {
4693     start = A->cmap->rstart;
4694     cmap  = a->garray;
4695     nzA   = a->A->cmap->n;
4696     nzB   = a->B->cmap->n;
4697     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4698     ncols = 0;
4699     for (i=0; i<nzB; i++) {  /* row < local row index */
4700       if (cmap[i] < start) idx[ncols++] = cmap[i];
4701       else break;
4702     }
4703     imark = i;
4704     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4705     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4706     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4707     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4708   } else {
4709     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4710     isrowb  = *rowb; iscolb = *colb;
4711     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4712     bseq[0] = *B_seq;
4713   }
4714   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4715   *B_seq = bseq[0];
4716   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4717   if (!rowb) {
4718     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4719   } else {
4720     *rowb = isrowb;
4721   }
4722   if (!colb) {
4723     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4724   } else {
4725     *colb = iscolb;
4726   }
4727   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4728   PetscFunctionReturn(0);
4729 }
4730 
4731 #undef __FUNCT__
4732 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4733 /*
4734     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4735     of the OFF-DIAGONAL portion of local A
4736 
4737     Collective on Mat
4738 
4739    Input Parameters:
4740 +    A,B - the matrices in mpiaij format
4741 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4742 
4743    Output Parameter:
4744 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4745 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4746 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4747 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4748 
4749     Level: developer
4750 
4751 */
4752 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4753 {
4754   VecScatter_MPI_General *gen_to,*gen_from;
4755   PetscErrorCode         ierr;
4756   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4757   Mat_SeqAIJ             *b_oth;
4758   VecScatter             ctx =a->Mvctx;
4759   MPI_Comm               comm;
4760   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4761   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4762   PetscScalar            *rvalues,*svalues;
4763   MatScalar              *b_otha,*bufa,*bufA;
4764   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4765   MPI_Request            *rwaits = NULL,*swaits = NULL;
4766   MPI_Status             *sstatus,rstatus;
4767   PetscMPIInt            jj,size;
4768   PetscInt               *cols,sbs,rbs;
4769   PetscScalar            *vals;
4770 
4771   PetscFunctionBegin;
4772   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4773   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4774 
4775   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4776     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4777   }
4778   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4779   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4780 
4781   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4782   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4783   rvalues  = gen_from->values; /* holds the length of receiving row */
4784   svalues  = gen_to->values;   /* holds the length of sending row */
4785   nrecvs   = gen_from->n;
4786   nsends   = gen_to->n;
4787 
4788   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4789   srow    = gen_to->indices;    /* local row index to be sent */
4790   sstarts = gen_to->starts;
4791   sprocs  = gen_to->procs;
4792   sstatus = gen_to->sstatus;
4793   sbs     = gen_to->bs;
4794   rstarts = gen_from->starts;
4795   rprocs  = gen_from->procs;
4796   rbs     = gen_from->bs;
4797 
4798   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4799   if (scall == MAT_INITIAL_MATRIX) {
4800     /* i-array */
4801     /*---------*/
4802     /*  post receives */
4803     for (i=0; i<nrecvs; i++) {
4804       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4805       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4806       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4807     }
4808 
4809     /* pack the outgoing message */
4810     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4811 
4812     sstartsj[0] = 0;
4813     rstartsj[0] = 0;
4814     len         = 0; /* total length of j or a array to be sent */
4815     k           = 0;
4816     for (i=0; i<nsends; i++) {
4817       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4818       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4819       for (j=0; j<nrows; j++) {
4820         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4821         for (l=0; l<sbs; l++) {
4822           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4823 
4824           rowlen[j*sbs+l] = ncols;
4825 
4826           len += ncols;
4827           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4828         }
4829         k++;
4830       }
4831       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4832 
4833       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4834     }
4835     /* recvs and sends of i-array are completed */
4836     i = nrecvs;
4837     while (i--) {
4838       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4839     }
4840     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4841 
4842     /* allocate buffers for sending j and a arrays */
4843     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4844     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4845 
4846     /* create i-array of B_oth */
4847     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4848 
4849     b_othi[0] = 0;
4850     len       = 0; /* total length of j or a array to be received */
4851     k         = 0;
4852     for (i=0; i<nrecvs; i++) {
4853       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4854       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4855       for (j=0; j<nrows; j++) {
4856         b_othi[k+1] = b_othi[k] + rowlen[j];
4857         len        += rowlen[j]; k++;
4858       }
4859       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4860     }
4861 
4862     /* allocate space for j and a arrrays of B_oth */
4863     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4864     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4865 
4866     /* j-array */
4867     /*---------*/
4868     /*  post receives of j-array */
4869     for (i=0; i<nrecvs; i++) {
4870       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4871       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4872     }
4873 
4874     /* pack the outgoing message j-array */
4875     k = 0;
4876     for (i=0; i<nsends; i++) {
4877       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4878       bufJ  = bufj+sstartsj[i];
4879       for (j=0; j<nrows; j++) {
4880         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4881         for (ll=0; ll<sbs; ll++) {
4882           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4883           for (l=0; l<ncols; l++) {
4884             *bufJ++ = cols[l];
4885           }
4886           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4887         }
4888       }
4889       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4890     }
4891 
4892     /* recvs and sends of j-array are completed */
4893     i = nrecvs;
4894     while (i--) {
4895       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4896     }
4897     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4898   } else if (scall == MAT_REUSE_MATRIX) {
4899     sstartsj = *startsj_s;
4900     rstartsj = *startsj_r;
4901     bufa     = *bufa_ptr;
4902     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4903     b_otha   = b_oth->a;
4904   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4905 
4906   /* a-array */
4907   /*---------*/
4908   /*  post receives of a-array */
4909   for (i=0; i<nrecvs; i++) {
4910     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4911     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4912   }
4913 
4914   /* pack the outgoing message a-array */
4915   k = 0;
4916   for (i=0; i<nsends; i++) {
4917     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4918     bufA  = bufa+sstartsj[i];
4919     for (j=0; j<nrows; j++) {
4920       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4921       for (ll=0; ll<sbs; ll++) {
4922         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4923         for (l=0; l<ncols; l++) {
4924           *bufA++ = vals[l];
4925         }
4926         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4927       }
4928     }
4929     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4930   }
4931   /* recvs and sends of a-array are completed */
4932   i = nrecvs;
4933   while (i--) {
4934     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4935   }
4936   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4937   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4938 
4939   if (scall == MAT_INITIAL_MATRIX) {
4940     /* put together the new matrix */
4941     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4942 
4943     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4944     /* Since these are PETSc arrays, change flags to free them as necessary. */
4945     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4946     b_oth->free_a  = PETSC_TRUE;
4947     b_oth->free_ij = PETSC_TRUE;
4948     b_oth->nonew   = 0;
4949 
4950     ierr = PetscFree(bufj);CHKERRQ(ierr);
4951     if (!startsj_s || !bufa_ptr) {
4952       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4953       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4954     } else {
4955       *startsj_s = sstartsj;
4956       *startsj_r = rstartsj;
4957       *bufa_ptr  = bufa;
4958     }
4959   }
4960   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4961   PetscFunctionReturn(0);
4962 }
4963 
4964 #undef __FUNCT__
4965 #define __FUNCT__ "MatGetCommunicationStructs"
4966 /*@C
4967   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4968 
4969   Not Collective
4970 
4971   Input Parameters:
4972 . A - The matrix in mpiaij format
4973 
4974   Output Parameter:
4975 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4976 . colmap - A map from global column index to local index into lvec
4977 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4978 
4979   Level: developer
4980 
4981 @*/
4982 #if defined(PETSC_USE_CTABLE)
4983 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4984 #else
4985 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4986 #endif
4987 {
4988   Mat_MPIAIJ *a;
4989 
4990   PetscFunctionBegin;
4991   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4992   PetscValidPointer(lvec, 2);
4993   PetscValidPointer(colmap, 3);
4994   PetscValidPointer(multScatter, 4);
4995   a = (Mat_MPIAIJ*) A->data;
4996   if (lvec) *lvec = a->lvec;
4997   if (colmap) *colmap = a->colmap;
4998   if (multScatter) *multScatter = a->Mvctx;
4999   PetscFunctionReturn(0);
5000 }
5001 
5002 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5003 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5004 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5005 #if defined(PETSC_HAVE_ELEMENTAL)
5006 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5007 #endif
5008 
5009 #undef __FUNCT__
5010 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5011 /*
5012     Computes (B'*A')' since computing B*A directly is untenable
5013 
5014                n                       p                          p
5015         (              )       (              )         (                  )
5016       m (      A       )  *  n (       B      )   =   m (         C        )
5017         (              )       (              )         (                  )
5018 
5019 */
5020 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5021 {
5022   PetscErrorCode ierr;
5023   Mat            At,Bt,Ct;
5024 
5025   PetscFunctionBegin;
5026   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5027   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5028   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5029   ierr = MatDestroy(&At);CHKERRQ(ierr);
5030   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5031   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5032   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5033   PetscFunctionReturn(0);
5034 }
5035 
5036 #undef __FUNCT__
5037 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5038 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5039 {
5040   PetscErrorCode ierr;
5041   PetscInt       m=A->rmap->n,n=B->cmap->n;
5042   Mat            Cmat;
5043 
5044   PetscFunctionBegin;
5045   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5046   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5047   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5048   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5049   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5050   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5051   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5052   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5053 
5054   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5055 
5056   *C = Cmat;
5057   PetscFunctionReturn(0);
5058 }
5059 
5060 /* ----------------------------------------------------------------*/
5061 #undef __FUNCT__
5062 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5063 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5064 {
5065   PetscErrorCode ierr;
5066 
5067   PetscFunctionBegin;
5068   if (scall == MAT_INITIAL_MATRIX) {
5069     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5070     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5071     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5072   }
5073   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5074   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5075   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5076   PetscFunctionReturn(0);
5077 }
5078 
5079 /*MC
5080    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5081 
5082    Options Database Keys:
5083 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5084 
5085   Level: beginner
5086 
5087 .seealso: MatCreateAIJ()
5088 M*/
5089 
5090 #undef __FUNCT__
5091 #define __FUNCT__ "MatCreate_MPIAIJ"
5092 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5093 {
5094   Mat_MPIAIJ     *b;
5095   PetscErrorCode ierr;
5096   PetscMPIInt    size;
5097 
5098   PetscFunctionBegin;
5099   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5100 
5101   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5102   B->data       = (void*)b;
5103   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5104   B->assembled  = PETSC_FALSE;
5105   B->insertmode = NOT_SET_VALUES;
5106   b->size       = size;
5107 
5108   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5109 
5110   /* build cache for off array entries formed */
5111   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5112 
5113   b->donotstash  = PETSC_FALSE;
5114   b->colmap      = 0;
5115   b->garray      = 0;
5116   b->roworiented = PETSC_TRUE;
5117 
5118   /* stuff used for matrix vector multiply */
5119   b->lvec  = NULL;
5120   b->Mvctx = NULL;
5121 
5122   /* stuff for MatGetRow() */
5123   b->rowindices   = 0;
5124   b->rowvalues    = 0;
5125   b->getrowactive = PETSC_FALSE;
5126 
5127   /* flexible pointer used in CUSP/CUSPARSE classes */
5128   b->spptr = NULL;
5129 
5130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5132   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5133   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5134   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5135   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5136   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5137   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5138   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5139   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5140   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5141 #if defined(PETSC_HAVE_ELEMENTAL)
5142   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5143 #endif
5144   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5145   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5146   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5147   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5148   PetscFunctionReturn(0);
5149 }
5150 
5151 #undef __FUNCT__
5152 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5153 /*@C
5154      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5155          and "off-diagonal" part of the matrix in CSR format.
5156 
5157    Collective on MPI_Comm
5158 
5159    Input Parameters:
5160 +  comm - MPI communicator
5161 .  m - number of local rows (Cannot be PETSC_DECIDE)
5162 .  n - This value should be the same as the local size used in creating the
5163        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5164        calculated if N is given) For square matrices n is almost always m.
5165 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5166 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5167 .   i - row indices for "diagonal" portion of matrix
5168 .   j - column indices
5169 .   a - matrix values
5170 .   oi - row indices for "off-diagonal" portion of matrix
5171 .   oj - column indices
5172 -   oa - matrix values
5173 
5174    Output Parameter:
5175 .   mat - the matrix
5176 
5177    Level: advanced
5178 
5179    Notes:
5180        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5181        must free the arrays once the matrix has been destroyed and not before.
5182 
5183        The i and j indices are 0 based
5184 
5185        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5186 
5187        This sets local rows and cannot be used to set off-processor values.
5188 
5189        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5190        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5191        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5192        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5193        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5194        communication if it is known that only local entries will be set.
5195 
5196 .keywords: matrix, aij, compressed row, sparse, parallel
5197 
5198 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5199           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5200 @*/
5201 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5202 {
5203   PetscErrorCode ierr;
5204   Mat_MPIAIJ     *maij;
5205 
5206   PetscFunctionBegin;
5207   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5208   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5209   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5210   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5211   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5212   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5213   maij = (Mat_MPIAIJ*) (*mat)->data;
5214 
5215   (*mat)->preallocated = PETSC_TRUE;
5216 
5217   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5218   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5219 
5220   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5221   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5222 
5223   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5224   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5225   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5226   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5227 
5228   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5229   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5230   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5231   PetscFunctionReturn(0);
5232 }
5233 
5234 /*
5235     Special version for direct calls from Fortran
5236 */
5237 #include <petsc/private/fortranimpl.h>
5238 
5239 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5240 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5241 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5242 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5243 #endif
5244 
5245 /* Change these macros so can be used in void function */
5246 #undef CHKERRQ
5247 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5248 #undef SETERRQ2
5249 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5250 #undef SETERRQ3
5251 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5252 #undef SETERRQ
5253 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5254 
5255 #undef __FUNCT__
5256 #define __FUNCT__ "matsetvaluesmpiaij_"
5257 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5258 {
5259   Mat            mat  = *mmat;
5260   PetscInt       m    = *mm, n = *mn;
5261   InsertMode     addv = *maddv;
5262   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5263   PetscScalar    value;
5264   PetscErrorCode ierr;
5265 
5266   MatCheckPreallocated(mat,1);
5267   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5268 
5269 #if defined(PETSC_USE_DEBUG)
5270   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5271 #endif
5272   {
5273     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5274     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5275     PetscBool roworiented = aij->roworiented;
5276 
5277     /* Some Variables required in the macro */
5278     Mat        A                 = aij->A;
5279     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5280     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5281     MatScalar  *aa               = a->a;
5282     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5283     Mat        B                 = aij->B;
5284     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5285     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5286     MatScalar  *ba               = b->a;
5287 
5288     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5289     PetscInt  nonew = a->nonew;
5290     MatScalar *ap1,*ap2;
5291 
5292     PetscFunctionBegin;
5293     for (i=0; i<m; i++) {
5294       if (im[i] < 0) continue;
5295 #if defined(PETSC_USE_DEBUG)
5296       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5297 #endif
5298       if (im[i] >= rstart && im[i] < rend) {
5299         row      = im[i] - rstart;
5300         lastcol1 = -1;
5301         rp1      = aj + ai[row];
5302         ap1      = aa + ai[row];
5303         rmax1    = aimax[row];
5304         nrow1    = ailen[row];
5305         low1     = 0;
5306         high1    = nrow1;
5307         lastcol2 = -1;
5308         rp2      = bj + bi[row];
5309         ap2      = ba + bi[row];
5310         rmax2    = bimax[row];
5311         nrow2    = bilen[row];
5312         low2     = 0;
5313         high2    = nrow2;
5314 
5315         for (j=0; j<n; j++) {
5316           if (roworiented) value = v[i*n+j];
5317           else value = v[i+j*m];
5318           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5319           if (in[j] >= cstart && in[j] < cend) {
5320             col = in[j] - cstart;
5321             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5322           } else if (in[j] < 0) continue;
5323 #if defined(PETSC_USE_DEBUG)
5324           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5325 #endif
5326           else {
5327             if (mat->was_assembled) {
5328               if (!aij->colmap) {
5329                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5330               }
5331 #if defined(PETSC_USE_CTABLE)
5332               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5333               col--;
5334 #else
5335               col = aij->colmap[in[j]] - 1;
5336 #endif
5337               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5338                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5339                 col  =  in[j];
5340                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5341                 B     = aij->B;
5342                 b     = (Mat_SeqAIJ*)B->data;
5343                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5344                 rp2   = bj + bi[row];
5345                 ap2   = ba + bi[row];
5346                 rmax2 = bimax[row];
5347                 nrow2 = bilen[row];
5348                 low2  = 0;
5349                 high2 = nrow2;
5350                 bm    = aij->B->rmap->n;
5351                 ba    = b->a;
5352               }
5353             } else col = in[j];
5354             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5355           }
5356         }
5357       } else if (!aij->donotstash) {
5358         if (roworiented) {
5359           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5360         } else {
5361           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5362         }
5363       }
5364     }
5365   }
5366   PetscFunctionReturnVoid();
5367 }
5368 
5369