xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0090e2cf8650914f034c1a2436476dfd4612a0b3)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1042 {
1043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   /* do nondiagonal part */
1048   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1049   /* send it on its way */
1050   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   /* do local part */
1052   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1053   /* receive remote parts */
1054   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 /*
1059   This only works correctly for square matrices where the subblock A->A is the
1060    diagonal block
1061 */
1062 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1063 {
1064   PetscErrorCode ierr;
1065   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1066 
1067   PetscFunctionBegin;
1068   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1069   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1070   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1075 {
1076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1081   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1086 {
1087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091 #if defined(PETSC_USE_LOG)
1092   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1093 #endif
1094   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1095   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1096   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1097   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1098 #if defined(PETSC_USE_CTABLE)
1099   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1100 #else
1101   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1102 #endif
1103   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1105   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1106   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1107   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1108   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1109 
1110   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1111   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1112   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1114   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1116   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1118 #if defined(PETSC_HAVE_ELEMENTAL)
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1120 #endif
1121 #if defined(PETSC_HAVE_HYPRE)
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1124 #endif
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1129 {
1130   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1131   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1132   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1133   PetscErrorCode ierr;
1134   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1135   int            fd;
1136   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1137   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1138   PetscScalar    *column_values;
1139   PetscInt       message_count,flowcontrolcount;
1140   FILE           *file;
1141 
1142   PetscFunctionBegin;
1143   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1144   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1145   nz   = A->nz + B->nz;
1146   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1147   if (!rank) {
1148     header[0] = MAT_FILE_CLASSID;
1149     header[1] = mat->rmap->N;
1150     header[2] = mat->cmap->N;
1151 
1152     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1153     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1154     /* get largest number of rows any processor has */
1155     rlen  = mat->rmap->n;
1156     range = mat->rmap->range;
1157     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1158   } else {
1159     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1160     rlen = mat->rmap->n;
1161   }
1162 
1163   /* load up the local row counts */
1164   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1165   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1166 
1167   /* store the row lengths to the file */
1168   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1169   if (!rank) {
1170     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     for (i=1; i<size; i++) {
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       rlen = range[i+1] - range[i];
1174       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     }
1177     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1178   } else {
1179     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1180     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1184 
1185   /* load up the local column indices */
1186   nzmax = nz; /* th processor needs space a largest processor needs */
1187   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1188   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1189   cnt   = 0;
1190   for (i=0; i<mat->rmap->n; i++) {
1191     for (j=B->i[i]; j<B->i[i+1]; j++) {
1192       if ((col = garray[B->j[j]]) > cstart) break;
1193       column_indices[cnt++] = col;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1196     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1197   }
1198   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1199 
1200   /* store the column indices to the file */
1201   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1202   if (!rank) {
1203     MPI_Status status;
1204     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     for (i=1; i<size; i++) {
1206       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1207       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1208       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1209       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1211     }
1212     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1213   } else {
1214     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1215     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1216     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1218   }
1219   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1220 
1221   /* load up the local column values */
1222   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1223   cnt  = 0;
1224   for (i=0; i<mat->rmap->n; i++) {
1225     for (j=B->i[i]; j<B->i[i+1]; j++) {
1226       if (garray[B->j[j]] > cstart) break;
1227       column_values[cnt++] = B->a[j];
1228     }
1229     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1230     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1231   }
1232   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1233 
1234   /* store the column values to the file */
1235   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1236   if (!rank) {
1237     MPI_Status status;
1238     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1239     for (i=1; i<size; i++) {
1240       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1241       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1242       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1243       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1244       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1245     }
1246     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1247   } else {
1248     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1249     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(column_values);CHKERRQ(ierr);
1254 
1255   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1256   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #include <petscdraw.h>
1261 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1262 {
1263   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1264   PetscErrorCode    ierr;
1265   PetscMPIInt       rank = aij->rank,size = aij->size;
1266   PetscBool         isdraw,iascii,isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1272   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1273   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1274   if (iascii) {
1275     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1278       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1279       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280       for (i=0; i<(PetscInt)size; i++) {
1281         nmax = PetscMax(nmax,nz[i]);
1282         nmin = PetscMin(nmin,nz[i]);
1283         navg += nz[i];
1284       }
1285       ierr = PetscFree(nz);CHKERRQ(ierr);
1286       navg = navg/size;
1287       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1288       PetscFunctionReturn(0);
1289     }
1290     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscBool inodes;
1294 
1295       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1296       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1297       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1298       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1299       if (!inodes) {
1300         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1301                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1302       } else {
1303         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1304                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1305       }
1306       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1308       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1309       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1310       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1312       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1313       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1314       PetscFunctionReturn(0);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount,inodelimit,*inodes;
1317       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1318       if (inodes) {
1319         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1320       } else {
1321         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1322       }
1323       PetscFunctionReturn(0);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(0);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1330       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1331     } else {
1332       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1333     }
1334     PetscFunctionReturn(0);
1335   } else if (isdraw) {
1336     PetscDraw draw;
1337     PetscBool isnull;
1338     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1339     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1340     if (isnull) PetscFunctionReturn(0);
1341   }
1342 
1343   {
1344     /* assemble the entire matrix onto first processor. */
1345     Mat        A;
1346     Mat_SeqAIJ *Aloc;
1347     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1348     MatScalar  *a;
1349 
1350     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1351     if (!rank) {
1352       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1353     } else {
1354       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1355     }
1356     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1357     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1358     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1359     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1360     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1361 
1362     /* copy over the A part */
1363     Aloc = (Mat_SeqAIJ*)aij->A->data;
1364     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1365     row  = mat->rmap->rstart;
1366     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1367     for (i=0; i<m; i++) {
1368       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1369       row++;
1370       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1371     }
1372     aj = Aloc->j;
1373     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1374 
1375     /* copy over the B part */
1376     Aloc = (Mat_SeqAIJ*)aij->B->data;
1377     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1378     row  = mat->rmap->rstart;
1379     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1380     ct   = cols;
1381     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1382     for (i=0; i<m; i++) {
1383       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1384       row++;
1385       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1386     }
1387     ierr = PetscFree(ct);CHKERRQ(ierr);
1388     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390     /*
1391        Everyone has to call to draw the matrix since the graphics waits are
1392        synchronized across all processors that share the PetscDraw object
1393     */
1394     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1395     if (!rank) {
1396       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1397       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1398     }
1399     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1400     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1401     ierr = MatDestroy(&A);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1407 {
1408   PetscErrorCode ierr;
1409   PetscBool      iascii,isdraw,issocket,isbinary;
1410 
1411   PetscFunctionBegin;
1412   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1413   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1414   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1415   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1416   if (iascii || isdraw || isbinary || issocket) {
1417     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1423 {
1424   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1425   PetscErrorCode ierr;
1426   Vec            bb1 = 0;
1427   PetscBool      hasop;
1428 
1429   PetscFunctionBegin;
1430   if (flag == SOR_APPLY_UPPER) {
1431     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1432     PetscFunctionReturn(0);
1433   }
1434 
1435   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1436     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1437   }
1438 
1439   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1440     if (flag & SOR_ZERO_INITIAL_GUESS) {
1441       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1442       its--;
1443     }
1444 
1445     while (its--) {
1446       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1447       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1448 
1449       /* update rhs: bb1 = bb - B*x */
1450       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1451       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1452 
1453       /* local sweep */
1454       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1455     }
1456   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1457     if (flag & SOR_ZERO_INITIAL_GUESS) {
1458       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1459       its--;
1460     }
1461     while (its--) {
1462       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1464 
1465       /* update rhs: bb1 = bb - B*x */
1466       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1467       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1468 
1469       /* local sweep */
1470       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1471     }
1472   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1473     if (flag & SOR_ZERO_INITIAL_GUESS) {
1474       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1475       its--;
1476     }
1477     while (its--) {
1478       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1480 
1481       /* update rhs: bb1 = bb - B*x */
1482       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1483       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1484 
1485       /* local sweep */
1486       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1487     }
1488   } else if (flag & SOR_EISENSTAT) {
1489     Vec xx1;
1490 
1491     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1492     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1493 
1494     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496     if (!mat->diag) {
1497       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1498       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1499     }
1500     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1501     if (hasop) {
1502       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1503     } else {
1504       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1505     }
1506     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1507 
1508     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1509 
1510     /* local sweep */
1511     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1512     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1513     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1514   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1515 
1516   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1517 
1518   matin->factorerrortype = mat->A->factorerrortype;
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1523 {
1524   Mat            aA,aB,Aperm;
1525   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1526   PetscScalar    *aa,*ba;
1527   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1528   PetscSF        rowsf,sf;
1529   IS             parcolp = NULL;
1530   PetscBool      done;
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1535   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1536   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1537   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1538 
1539   /* Invert row permutation to find out where my rows should go */
1540   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1541   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1542   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1543   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1544   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1545   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1546 
1547   /* Invert column permutation to find out where my columns should go */
1548   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1549   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1550   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1551   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1552   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1553   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1554   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1555 
1556   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1557   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1558   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1559 
1560   /* Find out where my gcols should go */
1561   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1562   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1563   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1564   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1565   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1566   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1567   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1568   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1569 
1570   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1571   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1572   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1573   for (i=0; i<m; i++) {
1574     PetscInt row = rdest[i],rowner;
1575     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1576     for (j=ai[i]; j<ai[i+1]; j++) {
1577       PetscInt cowner,col = cdest[aj[j]];
1578       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1579       if (rowner == cowner) dnnz[i]++;
1580       else onnz[i]++;
1581     }
1582     for (j=bi[i]; j<bi[i+1]; j++) {
1583       PetscInt cowner,col = gcdest[bj[j]];
1584       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1585       if (rowner == cowner) dnnz[i]++;
1586       else onnz[i]++;
1587     }
1588   }
1589   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1590   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1591   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1592   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1593   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1594 
1595   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1596   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1597   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1598   for (i=0; i<m; i++) {
1599     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1600     PetscInt j0,rowlen;
1601     rowlen = ai[i+1] - ai[i];
1602     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1603       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1604       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1605     }
1606     rowlen = bi[i+1] - bi[i];
1607     for (j0=j=0; j<rowlen; j0=j) {
1608       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1609       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1610     }
1611   }
1612   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1613   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1614   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1615   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1616   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1617   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1618   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1619   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1620   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1621   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1622   *B = Aperm;
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1627 {
1628   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1629   PetscErrorCode ierr;
1630 
1631   PetscFunctionBegin;
1632   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1633   if (ghosts) *ghosts = aij->garray;
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1638 {
1639   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1640   Mat            A    = mat->A,B = mat->B;
1641   PetscErrorCode ierr;
1642   PetscReal      isend[5],irecv[5];
1643 
1644   PetscFunctionBegin;
1645   info->block_size = 1.0;
1646   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1647 
1648   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1649   isend[3] = info->memory;  isend[4] = info->mallocs;
1650 
1651   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1652 
1653   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1654   isend[3] += info->memory;  isend[4] += info->mallocs;
1655   if (flag == MAT_LOCAL) {
1656     info->nz_used      = isend[0];
1657     info->nz_allocated = isend[1];
1658     info->nz_unneeded  = isend[2];
1659     info->memory       = isend[3];
1660     info->mallocs      = isend[4];
1661   } else if (flag == MAT_GLOBAL_MAX) {
1662     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   } else if (flag == MAT_GLOBAL_SUM) {
1670     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1671 
1672     info->nz_used      = irecv[0];
1673     info->nz_allocated = irecv[1];
1674     info->nz_unneeded  = irecv[2];
1675     info->memory       = irecv[3];
1676     info->mallocs      = irecv[4];
1677   }
1678   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1679   info->fill_ratio_needed = 0;
1680   info->factor_mallocs    = 0;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1685 {
1686   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   switch (op) {
1691   case MAT_NEW_NONZERO_LOCATIONS:
1692   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1693   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1694   case MAT_KEEP_NONZERO_PATTERN:
1695   case MAT_NEW_NONZERO_LOCATION_ERR:
1696   case MAT_USE_INODES:
1697   case MAT_IGNORE_ZERO_ENTRIES:
1698     MatCheckPreallocated(A,1);
1699     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1700     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1701     break;
1702   case MAT_ROW_ORIENTED:
1703     MatCheckPreallocated(A,1);
1704     a->roworiented = flg;
1705 
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1708     break;
1709   case MAT_NEW_DIAGONALS:
1710     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1711     break;
1712   case MAT_IGNORE_OFF_PROC_ENTRIES:
1713     a->donotstash = flg;
1714     break;
1715   case MAT_SPD:
1716     A->spd_set = PETSC_TRUE;
1717     A->spd     = flg;
1718     if (flg) {
1719       A->symmetric                  = PETSC_TRUE;
1720       A->structurally_symmetric     = PETSC_TRUE;
1721       A->symmetric_set              = PETSC_TRUE;
1722       A->structurally_symmetric_set = PETSC_TRUE;
1723     }
1724     break;
1725   case MAT_SYMMETRIC:
1726     MatCheckPreallocated(A,1);
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_STRUCTURALLY_SYMMETRIC:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     break;
1733   case MAT_HERMITIAN:
1734     MatCheckPreallocated(A,1);
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_SYMMETRY_ETERNAL:
1738     MatCheckPreallocated(A,1);
1739     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1740     break;
1741   case MAT_SUBMAT_SINGLEIS:
1742     A->submat_singleis = flg;
1743     break;
1744   case MAT_STRUCTURE_ONLY:
1745     /* The option is handled directly by MatSetOption() */
1746     break;
1747   default:
1748     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1749   }
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1754 {
1755   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1756   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1757   PetscErrorCode ierr;
1758   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1759   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1760   PetscInt       *cmap,*idx_p;
1761 
1762   PetscFunctionBegin;
1763   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1764   mat->getrowactive = PETSC_TRUE;
1765 
1766   if (!mat->rowvalues && (idx || v)) {
1767     /*
1768         allocate enough space to hold information from the longest row.
1769     */
1770     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1771     PetscInt   max = 1,tmp;
1772     for (i=0; i<matin->rmap->n; i++) {
1773       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1774       if (max < tmp) max = tmp;
1775     }
1776     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1777   }
1778 
1779   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1780   lrow = row - rstart;
1781 
1782   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1783   if (!v)   {pvA = 0; pvB = 0;}
1784   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1785   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1786   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1787   nztot = nzA + nzB;
1788 
1789   cmap = mat->garray;
1790   if (v  || idx) {
1791     if (nztot) {
1792       /* Sort by increasing column numbers, assuming A and B already sorted */
1793       PetscInt imark = -1;
1794       if (v) {
1795         *v = v_p = mat->rowvalues;
1796         for (i=0; i<nzB; i++) {
1797           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1798           else break;
1799         }
1800         imark = i;
1801         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1802         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1803       }
1804       if (idx) {
1805         *idx = idx_p = mat->rowindices;
1806         if (imark > -1) {
1807           for (i=0; i<imark; i++) {
1808             idx_p[i] = cmap[cworkB[i]];
1809           }
1810         } else {
1811           for (i=0; i<nzB; i++) {
1812             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1813             else break;
1814           }
1815           imark = i;
1816         }
1817         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1818         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1819       }
1820     } else {
1821       if (idx) *idx = 0;
1822       if (v)   *v   = 0;
1823     }
1824   }
1825   *nz  = nztot;
1826   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1827   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1832 {
1833   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1834 
1835   PetscFunctionBegin;
1836   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1837   aij->getrowactive = PETSC_FALSE;
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1842 {
1843   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1844   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1845   PetscErrorCode ierr;
1846   PetscInt       i,j,cstart = mat->cmap->rstart;
1847   PetscReal      sum = 0.0;
1848   MatScalar      *v;
1849 
1850   PetscFunctionBegin;
1851   if (aij->size == 1) {
1852     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1853   } else {
1854     if (type == NORM_FROBENIUS) {
1855       v = amat->a;
1856       for (i=0; i<amat->nz; i++) {
1857         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1858       }
1859       v = bmat->a;
1860       for (i=0; i<bmat->nz; i++) {
1861         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1862       }
1863       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1864       *norm = PetscSqrtReal(*norm);
1865       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1866     } else if (type == NORM_1) { /* max column norm */
1867       PetscReal *tmp,*tmp2;
1868       PetscInt  *jj,*garray = aij->garray;
1869       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1870       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1871       *norm = 0.0;
1872       v     = amat->a; jj = amat->j;
1873       for (j=0; j<amat->nz; j++) {
1874         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1875       }
1876       v = bmat->a; jj = bmat->j;
1877       for (j=0; j<bmat->nz; j++) {
1878         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1879       }
1880       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1881       for (j=0; j<mat->cmap->N; j++) {
1882         if (tmp2[j] > *norm) *norm = tmp2[j];
1883       }
1884       ierr = PetscFree(tmp);CHKERRQ(ierr);
1885       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1886       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1887     } else if (type == NORM_INFINITY) { /* max row norm */
1888       PetscReal ntemp = 0.0;
1889       for (j=0; j<aij->A->rmap->n; j++) {
1890         v   = amat->a + amat->i[j];
1891         sum = 0.0;
1892         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v); v++;
1894         }
1895         v = bmat->a + bmat->i[j];
1896         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1897           sum += PetscAbsScalar(*v); v++;
1898         }
1899         if (sum > ntemp) ntemp = sum;
1900       }
1901       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1902       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1903     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1904   }
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1909 {
1910   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1911   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1912   PetscErrorCode ierr;
1913   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1914   PetscInt       cstart = A->cmap->rstart,ncol;
1915   Mat            B;
1916   MatScalar      *array;
1917 
1918   PetscFunctionBegin;
1919   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1920 
1921   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1922   ai = Aloc->i; aj = Aloc->j;
1923   bi = Bloc->i; bj = Bloc->j;
1924   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1925     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1926     PetscSFNode          *oloc;
1927     PETSC_UNUSED PetscSF sf;
1928 
1929     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1930     /* compute d_nnz for preallocation */
1931     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1932     for (i=0; i<ai[ma]; i++) {
1933       d_nnz[aj[i]]++;
1934       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1935     }
1936     /* compute local off-diagonal contributions */
1937     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1938     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1939     /* map those to global */
1940     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1941     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1942     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1943     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1944     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1945     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1946     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1947 
1948     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1949     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1950     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1951     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1952     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1953     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1954   } else {
1955     B    = *matout;
1956     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1957     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1958   }
1959 
1960   /* copy over the A part */
1961   array = Aloc->a;
1962   row   = A->rmap->rstart;
1963   for (i=0; i<ma; i++) {
1964     ncol = ai[i+1]-ai[i];
1965     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1966     row++;
1967     array += ncol; aj += ncol;
1968   }
1969   aj = Aloc->j;
1970   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1971 
1972   /* copy over the B part */
1973   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1974   array = Bloc->a;
1975   row   = A->rmap->rstart;
1976   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1977   cols_tmp = cols;
1978   for (i=0; i<mb; i++) {
1979     ncol = bi[i+1]-bi[i];
1980     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1981     row++;
1982     array += ncol; cols_tmp += ncol;
1983   }
1984   ierr = PetscFree(cols);CHKERRQ(ierr);
1985 
1986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1989     *matout = B;
1990   } else {
1991     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1992   }
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1997 {
1998   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1999   Mat            a    = aij->A,b = aij->B;
2000   PetscErrorCode ierr;
2001   PetscInt       s1,s2,s3;
2002 
2003   PetscFunctionBegin;
2004   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2005   if (rr) {
2006     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2007     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2008     /* Overlap communication with computation. */
2009     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2010   }
2011   if (ll) {
2012     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2013     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2014     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2015   }
2016   /* scale  the diagonal block */
2017   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2018 
2019   if (rr) {
2020     /* Do a scatter end and then right scale the off-diagonal block */
2021     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2022     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2023   }
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2028 {
2029   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2030   PetscErrorCode ierr;
2031 
2032   PetscFunctionBegin;
2033   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2038 {
2039   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2040   Mat            a,b,c,d;
2041   PetscBool      flg;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   a = matA->A; b = matA->B;
2046   c = matB->A; d = matB->B;
2047 
2048   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2049   if (flg) {
2050     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2051   }
2052   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2057 {
2058   PetscErrorCode ierr;
2059   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2060   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2061 
2062   PetscFunctionBegin;
2063   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2064   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2065     /* because of the column compression in the off-processor part of the matrix a->B,
2066        the number of columns in a->B and b->B may be different, hence we cannot call
2067        the MatCopy() directly on the two parts. If need be, we can provide a more
2068        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2069        then copying the submatrices */
2070     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2071   } else {
2072     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2073     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2074   }
2075   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2080 {
2081   PetscErrorCode ierr;
2082 
2083   PetscFunctionBegin;
2084   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 /*
2089    Computes the number of nonzeros per row needed for preallocation when X and Y
2090    have different nonzero structure.
2091 */
2092 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2093 {
2094   PetscInt       i,j,k,nzx,nzy;
2095 
2096   PetscFunctionBegin;
2097   /* Set the number of nonzeros in the new matrix */
2098   for (i=0; i<m; i++) {
2099     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2100     nzx = xi[i+1] - xi[i];
2101     nzy = yi[i+1] - yi[i];
2102     nnz[i] = 0;
2103     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2104       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2105       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2106       nnz[i]++;
2107     }
2108     for (; k<nzy; k++) nnz[i]++;
2109   }
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2114 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2115 {
2116   PetscErrorCode ierr;
2117   PetscInt       m = Y->rmap->N;
2118   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2119   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2120 
2121   PetscFunctionBegin;
2122   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2127 {
2128   PetscErrorCode ierr;
2129   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2130   PetscBLASInt   bnz,one=1;
2131   Mat_SeqAIJ     *x,*y;
2132 
2133   PetscFunctionBegin;
2134   if (str == SAME_NONZERO_PATTERN) {
2135     PetscScalar alpha = a;
2136     x    = (Mat_SeqAIJ*)xx->A->data;
2137     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2138     y    = (Mat_SeqAIJ*)yy->A->data;
2139     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2140     x    = (Mat_SeqAIJ*)xx->B->data;
2141     y    = (Mat_SeqAIJ*)yy->B->data;
2142     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2143     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2144     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2145   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2146     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2147   } else {
2148     Mat      B;
2149     PetscInt *nnz_d,*nnz_o;
2150     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2151     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2152     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2153     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2154     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2155     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2156     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2157     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2158     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2159     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2160     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2161     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2162     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2163     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2169 
2170 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2171 {
2172 #if defined(PETSC_USE_COMPLEX)
2173   PetscErrorCode ierr;
2174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2178   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2179 #else
2180   PetscFunctionBegin;
2181 #endif
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2186 {
2187   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2192   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2197 {
2198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2203   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2208 {
2209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2210   PetscErrorCode ierr;
2211   PetscInt       i,*idxb = 0;
2212   PetscScalar    *va,*vb;
2213   Vec            vtmp;
2214 
2215   PetscFunctionBegin;
2216   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2217   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2218   if (idx) {
2219     for (i=0; i<A->rmap->n; i++) {
2220       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2221     }
2222   }
2223 
2224   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2225   if (idx) {
2226     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2227   }
2228   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2229   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2230 
2231   for (i=0; i<A->rmap->n; i++) {
2232     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2233       va[i] = vb[i];
2234       if (idx) idx[i] = a->garray[idxb[i]];
2235     }
2236   }
2237 
2238   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2239   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2240   ierr = PetscFree(idxb);CHKERRQ(ierr);
2241   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2246 {
2247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2248   PetscErrorCode ierr;
2249   PetscInt       i,*idxb = 0;
2250   PetscScalar    *va,*vb;
2251   Vec            vtmp;
2252 
2253   PetscFunctionBegin;
2254   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2255   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2256   if (idx) {
2257     for (i=0; i<A->cmap->n; i++) {
2258       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2259     }
2260   }
2261 
2262   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2263   if (idx) {
2264     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2265   }
2266   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2267   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2268 
2269   for (i=0; i<A->rmap->n; i++) {
2270     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2271       va[i] = vb[i];
2272       if (idx) idx[i] = a->garray[idxb[i]];
2273     }
2274   }
2275 
2276   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2277   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2278   ierr = PetscFree(idxb);CHKERRQ(ierr);
2279   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2284 {
2285   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2286   PetscInt       n      = A->rmap->n;
2287   PetscInt       cstart = A->cmap->rstart;
2288   PetscInt       *cmap  = mat->garray;
2289   PetscInt       *diagIdx, *offdiagIdx;
2290   Vec            diagV, offdiagV;
2291   PetscScalar    *a, *diagA, *offdiagA;
2292   PetscInt       r;
2293   PetscErrorCode ierr;
2294 
2295   PetscFunctionBegin;
2296   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2297   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2298   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2299   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2300   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2301   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2302   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2303   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2304   for (r = 0; r < n; ++r) {
2305     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2306       a[r]   = diagA[r];
2307       idx[r] = cstart + diagIdx[r];
2308     } else {
2309       a[r]   = offdiagA[r];
2310       idx[r] = cmap[offdiagIdx[r]];
2311     }
2312   }
2313   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2314   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2315   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2316   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2317   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2318   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2323 {
2324   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2325   PetscInt       n      = A->rmap->n;
2326   PetscInt       cstart = A->cmap->rstart;
2327   PetscInt       *cmap  = mat->garray;
2328   PetscInt       *diagIdx, *offdiagIdx;
2329   Vec            diagV, offdiagV;
2330   PetscScalar    *a, *diagA, *offdiagA;
2331   PetscInt       r;
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2336   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2337   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2338   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2339   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2340   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2341   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2342   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2343   for (r = 0; r < n; ++r) {
2344     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2345       a[r]   = diagA[r];
2346       idx[r] = cstart + diagIdx[r];
2347     } else {
2348       a[r]   = offdiagA[r];
2349       idx[r] = cmap[offdiagIdx[r]];
2350     }
2351   }
2352   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2353   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2354   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2355   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2356   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2357   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2362 {
2363   PetscErrorCode ierr;
2364   Mat            *dummy;
2365 
2366   PetscFunctionBegin;
2367   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2368   *newmat = *dummy;
2369   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2374 {
2375   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2380   A->factorerrortype = a->A->factorerrortype;
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2385 {
2386   PetscErrorCode ierr;
2387   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2388 
2389   PetscFunctionBegin;
2390   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2391   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2392   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2398 {
2399   PetscFunctionBegin;
2400   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2401   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 /*@
2406    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2407 
2408    Collective on Mat
2409 
2410    Input Parameters:
2411 +    A - the matrix
2412 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2413 
2414  Level: advanced
2415 
2416 @*/
2417 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2418 {
2419   PetscErrorCode       ierr;
2420 
2421   PetscFunctionBegin;
2422   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2427 {
2428   PetscErrorCode       ierr;
2429   PetscBool            sc = PETSC_FALSE,flg;
2430 
2431   PetscFunctionBegin;
2432   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2433   ierr = PetscObjectOptionsBegin((PetscObject)A);
2434     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2435     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2436     if (flg) {
2437       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2438     }
2439   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2444 {
2445   PetscErrorCode ierr;
2446   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2447   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2448 
2449   PetscFunctionBegin;
2450   if (!Y->preallocated) {
2451     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2452   } else if (!aij->nz) {
2453     PetscInt nonew = aij->nonew;
2454     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2455     aij->nonew = nonew;
2456   }
2457   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2462 {
2463   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2464   PetscErrorCode ierr;
2465 
2466   PetscFunctionBegin;
2467   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2468   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2469   if (d) {
2470     PetscInt rstart;
2471     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2472     *d += rstart;
2473 
2474   }
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 
2479 /* -------------------------------------------------------------------*/
2480 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2481                                        MatGetRow_MPIAIJ,
2482                                        MatRestoreRow_MPIAIJ,
2483                                        MatMult_MPIAIJ,
2484                                 /* 4*/ MatMultAdd_MPIAIJ,
2485                                        MatMultTranspose_MPIAIJ,
2486                                        MatMultTransposeAdd_MPIAIJ,
2487                                        0,
2488                                        0,
2489                                        0,
2490                                 /*10*/ 0,
2491                                        0,
2492                                        0,
2493                                        MatSOR_MPIAIJ,
2494                                        MatTranspose_MPIAIJ,
2495                                 /*15*/ MatGetInfo_MPIAIJ,
2496                                        MatEqual_MPIAIJ,
2497                                        MatGetDiagonal_MPIAIJ,
2498                                        MatDiagonalScale_MPIAIJ,
2499                                        MatNorm_MPIAIJ,
2500                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2501                                        MatAssemblyEnd_MPIAIJ,
2502                                        MatSetOption_MPIAIJ,
2503                                        MatZeroEntries_MPIAIJ,
2504                                 /*24*/ MatZeroRows_MPIAIJ,
2505                                        0,
2506                                        0,
2507                                        0,
2508                                        0,
2509                                 /*29*/ MatSetUp_MPIAIJ,
2510                                        0,
2511                                        0,
2512                                        MatGetDiagonalBlock_MPIAIJ,
2513                                        0,
2514                                 /*34*/ MatDuplicate_MPIAIJ,
2515                                        0,
2516                                        0,
2517                                        0,
2518                                        0,
2519                                 /*39*/ MatAXPY_MPIAIJ,
2520                                        MatCreateSubMatrices_MPIAIJ,
2521                                        MatIncreaseOverlap_MPIAIJ,
2522                                        MatGetValues_MPIAIJ,
2523                                        MatCopy_MPIAIJ,
2524                                 /*44*/ MatGetRowMax_MPIAIJ,
2525                                        MatScale_MPIAIJ,
2526                                        MatShift_MPIAIJ,
2527                                        MatDiagonalSet_MPIAIJ,
2528                                        MatZeroRowsColumns_MPIAIJ,
2529                                 /*49*/ MatSetRandom_MPIAIJ,
2530                                        0,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2535                                        0,
2536                                        MatSetUnfactored_MPIAIJ,
2537                                        MatPermute_MPIAIJ,
2538                                        0,
2539                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2540                                        MatDestroy_MPIAIJ,
2541                                        MatView_MPIAIJ,
2542                                        0,
2543                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2544                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2545                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2550                                        MatGetRowMinAbs_MPIAIJ,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                        0,
2555                                 /*75*/ MatFDColoringApply_AIJ,
2556                                        MatSetFromOptions_MPIAIJ,
2557                                        0,
2558                                        0,
2559                                        MatFindZeroDiagonals_MPIAIJ,
2560                                 /*80*/ 0,
2561                                        0,
2562                                        0,
2563                                 /*83*/ MatLoad_MPIAIJ,
2564                                        0,
2565                                        0,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2570                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2571                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2572                                        MatPtAP_MPIAIJ_MPIAIJ,
2573                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2574                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                        0,
2579                                 /*99*/ 0,
2580                                        0,
2581                                        0,
2582                                        MatConjugate_MPIAIJ,
2583                                        0,
2584                                 /*104*/MatSetValuesRow_MPIAIJ,
2585                                        MatRealPart_MPIAIJ,
2586                                        MatImaginaryPart_MPIAIJ,
2587                                        0,
2588                                        0,
2589                                 /*109*/0,
2590                                        0,
2591                                        MatGetRowMin_MPIAIJ,
2592                                        0,
2593                                        MatMissingDiagonal_MPIAIJ,
2594                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2595                                        0,
2596                                        MatGetGhosts_MPIAIJ,
2597                                        0,
2598                                        0,
2599                                 /*119*/0,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                        MatGetMultiProcBlock_MPIAIJ,
2604                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2605                                        MatGetColumnNorms_MPIAIJ,
2606                                        MatInvertBlockDiagonal_MPIAIJ,
2607                                        0,
2608                                        MatCreateSubMatricesMPI_MPIAIJ,
2609                                 /*129*/0,
2610                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2611                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2612                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2613                                        0,
2614                                 /*134*/0,
2615                                        0,
2616                                        MatRARt_MPIAIJ_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                 /*139*/MatSetBlockSizes_MPIAIJ,
2620                                        0,
2621                                        0,
2622                                        MatFDColoringSetUp_MPIXAIJ,
2623                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2624                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2625 };
2626 
2627 /* ----------------------------------------------------------------------------------------*/
2628 
2629 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2630 {
2631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2632   PetscErrorCode ierr;
2633 
2634   PetscFunctionBegin;
2635   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2636   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2641 {
2642   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2643   PetscErrorCode ierr;
2644 
2645   PetscFunctionBegin;
2646   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2647   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2652 {
2653   Mat_MPIAIJ     *b;
2654   PetscErrorCode ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2658   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2659   b = (Mat_MPIAIJ*)B->data;
2660 
2661 #if defined(PETSC_USE_CTABLE)
2662   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2663 #else
2664   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2665 #endif
2666   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2667   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2668   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2669 
2670   /* Because the B will have been resized we simply destroy it and create a new one each time */
2671   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2672   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2673   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2674   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2675   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2676   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2677 
2678   if (!B->preallocated) {
2679     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2680     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2681     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2682     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2683     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2684   }
2685 
2686   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2687   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2688   B->preallocated  = PETSC_TRUE;
2689   B->was_assembled = PETSC_FALSE;
2690   B->assembled     = PETSC_FALSE;;
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2695 {
2696   Mat            mat;
2697   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2698   PetscErrorCode ierr;
2699 
2700   PetscFunctionBegin;
2701   *newmat = 0;
2702   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2703   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2704   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2705   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2706   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2707   a       = (Mat_MPIAIJ*)mat->data;
2708 
2709   mat->factortype   = matin->factortype;
2710   mat->assembled    = PETSC_TRUE;
2711   mat->insertmode   = NOT_SET_VALUES;
2712   mat->preallocated = PETSC_TRUE;
2713 
2714   a->size         = oldmat->size;
2715   a->rank         = oldmat->rank;
2716   a->donotstash   = oldmat->donotstash;
2717   a->roworiented  = oldmat->roworiented;
2718   a->rowindices   = 0;
2719   a->rowvalues    = 0;
2720   a->getrowactive = PETSC_FALSE;
2721 
2722   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2723   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2724 
2725   if (oldmat->colmap) {
2726 #if defined(PETSC_USE_CTABLE)
2727     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2728 #else
2729     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2730     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2731     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2732 #endif
2733   } else a->colmap = 0;
2734   if (oldmat->garray) {
2735     PetscInt len;
2736     len  = oldmat->B->cmap->n;
2737     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2738     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2739     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2740   } else a->garray = 0;
2741 
2742   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2743   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2744   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2745   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2746   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2747   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2748   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2749   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2750   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2751   *newmat = mat;
2752   PetscFunctionReturn(0);
2753 }
2754 
2755 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2756 {
2757   PetscScalar    *vals,*svals;
2758   MPI_Comm       comm;
2759   PetscErrorCode ierr;
2760   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2761   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2762   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2763   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2764   PetscInt       cend,cstart,n,*rowners;
2765   int            fd;
2766   PetscInt       bs = newMat->rmap->bs;
2767 
2768   PetscFunctionBegin;
2769   /* force binary viewer to load .info file if it has not yet done so */
2770   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2771   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2772   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2773   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2774   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2775   if (!rank) {
2776     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2777     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2778     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2779   }
2780 
2781   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2782   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2783   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2784   if (bs < 0) bs = 1;
2785 
2786   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2787   M    = header[1]; N = header[2];
2788 
2789   /* If global sizes are set, check if they are consistent with that given in the file */
2790   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2791   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2792 
2793   /* determine ownership of all (block) rows */
2794   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2795   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2796   else m = newMat->rmap->n; /* Set by user */
2797 
2798   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2799   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2800 
2801   /* First process needs enough room for process with most rows */
2802   if (!rank) {
2803     mmax = rowners[1];
2804     for (i=2; i<=size; i++) {
2805       mmax = PetscMax(mmax, rowners[i]);
2806     }
2807   } else mmax = -1;             /* unused, but compilers complain */
2808 
2809   rowners[0] = 0;
2810   for (i=2; i<=size; i++) {
2811     rowners[i] += rowners[i-1];
2812   }
2813   rstart = rowners[rank];
2814   rend   = rowners[rank+1];
2815 
2816   /* distribute row lengths to all processors */
2817   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2818   if (!rank) {
2819     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2820     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2821     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2822     for (j=0; j<m; j++) {
2823       procsnz[0] += ourlens[j];
2824     }
2825     for (i=1; i<size; i++) {
2826       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2827       /* calculate the number of nonzeros on each processor */
2828       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2829         procsnz[i] += rowlengths[j];
2830       }
2831       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2832     }
2833     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2834   } else {
2835     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2836   }
2837 
2838   if (!rank) {
2839     /* determine max buffer needed and allocate it */
2840     maxnz = 0;
2841     for (i=0; i<size; i++) {
2842       maxnz = PetscMax(maxnz,procsnz[i]);
2843     }
2844     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2845 
2846     /* read in my part of the matrix column indices  */
2847     nz   = procsnz[0];
2848     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2849     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2850 
2851     /* read in every one elses and ship off */
2852     for (i=1; i<size; i++) {
2853       nz   = procsnz[i];
2854       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2855       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2856     }
2857     ierr = PetscFree(cols);CHKERRQ(ierr);
2858   } else {
2859     /* determine buffer space needed for message */
2860     nz = 0;
2861     for (i=0; i<m; i++) {
2862       nz += ourlens[i];
2863     }
2864     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2865 
2866     /* receive message of column indices*/
2867     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2868   }
2869 
2870   /* determine column ownership if matrix is not square */
2871   if (N != M) {
2872     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2873     else n = newMat->cmap->n;
2874     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2875     cstart = cend - n;
2876   } else {
2877     cstart = rstart;
2878     cend   = rend;
2879     n      = cend - cstart;
2880   }
2881 
2882   /* loop over local rows, determining number of off diagonal entries */
2883   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2884   jj   = 0;
2885   for (i=0; i<m; i++) {
2886     for (j=0; j<ourlens[i]; j++) {
2887       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2888       jj++;
2889     }
2890   }
2891 
2892   for (i=0; i<m; i++) {
2893     ourlens[i] -= offlens[i];
2894   }
2895   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2896 
2897   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2898 
2899   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2900 
2901   for (i=0; i<m; i++) {
2902     ourlens[i] += offlens[i];
2903   }
2904 
2905   if (!rank) {
2906     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2907 
2908     /* read in my part of the matrix numerical values  */
2909     nz   = procsnz[0];
2910     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2911 
2912     /* insert into matrix */
2913     jj      = rstart;
2914     smycols = mycols;
2915     svals   = vals;
2916     for (i=0; i<m; i++) {
2917       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2918       smycols += ourlens[i];
2919       svals   += ourlens[i];
2920       jj++;
2921     }
2922 
2923     /* read in other processors and ship out */
2924     for (i=1; i<size; i++) {
2925       nz   = procsnz[i];
2926       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2927       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2928     }
2929     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2930   } else {
2931     /* receive numeric values */
2932     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2933 
2934     /* receive message of values*/
2935     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2936 
2937     /* insert into matrix */
2938     jj      = rstart;
2939     smycols = mycols;
2940     svals   = vals;
2941     for (i=0; i<m; i++) {
2942       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2943       smycols += ourlens[i];
2944       svals   += ourlens[i];
2945       jj++;
2946     }
2947   }
2948   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2949   ierr = PetscFree(vals);CHKERRQ(ierr);
2950   ierr = PetscFree(mycols);CHKERRQ(ierr);
2951   ierr = PetscFree(rowners);CHKERRQ(ierr);
2952   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2953   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 /* Not scalable because of ISAllGather() unless getting all columns. */
2958 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2959 {
2960   PetscErrorCode ierr;
2961   IS             iscol_local;
2962   PetscBool      isstride;
2963   PetscMPIInt    lisstride=0,gisstride;
2964 
2965   PetscFunctionBegin;
2966   /* check if we are grabbing all columns*/
2967   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2968 
2969   if (isstride) {
2970     PetscInt  start,len,mstart,mlen;
2971     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2972     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2973     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2974     if (mstart == start && mlen-mstart == len) lisstride = 1;
2975   }
2976 
2977   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2978   if (gisstride) {
2979     PetscInt N;
2980     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2981     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2982     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2983     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2984   } else {
2985     PetscInt cbs;
2986     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2987     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2988     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2989   }
2990 
2991   *isseq = iscol_local;
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 /*
2996  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
2997  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
2998 
2999  Input Parameters:
3000    mat - matrix
3001    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3002            i.e., mat->rstart <= isrow[i] < mat->rend
3003    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3004            i.e., mat->cstart <= iscol[i] < mat->cend
3005  Output Parameter:
3006    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3007    iscol_o - sequential column index set for retrieving mat->B
3008    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3009  */
3010 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3011 {
3012   PetscErrorCode ierr;
3013   Vec            x,cmap;
3014   const PetscInt *is_idx;
3015   PetscScalar    *xarray,*cmaparray;
3016   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3017   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3018   Mat            B=a->B;
3019   Vec            lvec=a->lvec,lcmap;
3020   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3021   MPI_Comm       comm;
3022 
3023   PetscFunctionBegin;
3024   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3025   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3026 
3027   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3028   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3029   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3030   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3031 
3032   /* Get start indices */
3033   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3034   isstart -= ncols;
3035   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3036 
3037   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3038   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3039   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3040   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3041   for (i=0; i<ncols; i++) {
3042     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3043     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3044     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3045   }
3046   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3047   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3048   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3049 
3050   /* Get iscol_d */
3051   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3052   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3053   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3054 
3055   /* Get isrow_d */
3056   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3057   rstart = mat->rmap->rstart;
3058   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3059   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3060   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3061   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3062 
3063   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3064   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3065   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3066 
3067   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3068   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3069 
3070   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3071 
3072   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3073   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3074   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3075 
3076   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3077   /* off-process column indices */
3078   count = 0;
3079   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3080   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3081 
3082   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3083   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3084   for (i=0; i<Bn; i++) {
3085     if (PetscRealPart(xarray[i]) > -1.0) {
3086       idx[count]     = i;                   /* local column index in off-diagonal part B */
3087       cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3088     }
3089   }
3090   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3091   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3092 
3093   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3094   /* cannot ensure iscol_o has same blocksize as iscol! */
3095 
3096   ierr = PetscFree(idx);CHKERRQ(ierr);
3097 
3098   *garray = cmap1;
3099 
3100   ierr = VecDestroy(&x);CHKERRQ(ierr);
3101   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3102   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3103   PetscFunctionReturn(0);
3104 }
3105 
3106 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3107 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3108 {
3109   PetscErrorCode ierr;
3110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3111   Mat            M = NULL;
3112   MPI_Comm       comm;
3113   IS             iscol_d,isrow_d,iscol_o;
3114   Mat            Asub = NULL,Bsub = NULL;
3115   PetscInt       n;
3116 
3117   PetscFunctionBegin;
3118   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3119 
3120   if (call == MAT_REUSE_MATRIX) {
3121     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3122     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3123     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3124 
3125     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3126     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3127 
3128     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3129     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3130 
3131     /* Update diagonal and off-diagonal portions of submat */
3132     asub = (Mat_MPIAIJ*)(*submat)->data;
3133     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3134     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3135     if (n) {
3136       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3137     }
3138     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3139     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3140 
3141   } else { /* call == MAT_INITIAL_MATRIX) */
3142     const PetscInt *garray;
3143     PetscInt        BsubN;
3144 
3145     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3146     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3147 
3148     /* Create local submatrices Asub and Bsub */
3149     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3150     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3151 
3152     /* Create submatrix M */
3153     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3154 
3155     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3156     asub = (Mat_MPIAIJ*)M->data;
3157 
3158     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3159     n = asub->B->cmap->N;
3160     if (BsubN > n) {
3161       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3162       const PetscInt *idx;
3163       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3164       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3165 
3166       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3167       j = 0;
3168       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3169       for (i=0; i<n; i++) {
3170         if (j >= BsubN) break;
3171         while (subgarray[i] > garray[j]) j++;
3172 
3173         if (subgarray[i] == garray[j]) {
3174           idx_new[i] = idx[j++];
3175         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3176       }
3177       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3178 
3179       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3180       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3181 
3182     } else if (BsubN < n) {
3183       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3184     }
3185 
3186     ierr = PetscFree(garray);CHKERRQ(ierr);
3187     *submat = M;
3188 
3189     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3190     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3191     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3192 
3193     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3194     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3195 
3196     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3197     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3198   }
3199   PetscFunctionReturn(0);
3200 }
3201 
3202 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3203 {
3204   PetscErrorCode ierr;
3205   IS             iscol_local=NULL,isrow_d;
3206   PetscInt       csize;
3207   PetscInt       n,i,j,start,end;
3208   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3209   MPI_Comm       comm;
3210 
3211   PetscFunctionBegin;
3212   /* If isrow has same processor distribution as mat,
3213      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3214   if (call == MAT_REUSE_MATRIX) {
3215     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3216     if (isrow_d) {
3217       sameRowDist  = PETSC_TRUE;
3218       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3219     } else {
3220       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3221       if (iscol_local) {
3222         sameRowDist  = PETSC_TRUE;
3223         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3224       }
3225     }
3226   } else {
3227     /* Check if isrow has same processor distribution as mat */
3228     sameDist[0] = PETSC_FALSE;
3229     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3230     if (!n) {
3231       sameDist[0] = PETSC_TRUE;
3232     } else {
3233       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3234       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3235       if (i >= start && j < end) {
3236         sameDist[0] = PETSC_TRUE;
3237       }
3238     }
3239 
3240     /* Check if iscol has same processor distribution as mat */
3241     sameDist[1] = PETSC_FALSE;
3242     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3243     if (!n) {
3244       sameDist[1] = PETSC_TRUE;
3245     } else {
3246       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3247       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3248       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3249     }
3250 
3251     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3252     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3253     sameRowDist = tsameDist[0];
3254   }
3255 
3256   if (sameRowDist) {
3257     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3258       /* isrow and iscol have same processor distribution as mat */
3259       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3260       PetscFunctionReturn(0);
3261     } else { /* sameRowDist */
3262       /* isrow has same processor distribution as mat */
3263       if (call == MAT_INITIAL_MATRIX) {
3264         PetscBool sorted;
3265         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3266         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3267         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3268         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3269 
3270         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3271         if (sorted) {
3272           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3273           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3274           PetscFunctionReturn(0);
3275         }
3276       } else { /* call == MAT_REUSE_MATRIX */
3277         IS    iscol_sub;
3278         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3279         if (iscol_sub) {
3280           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3281           PetscFunctionReturn(0);
3282         }
3283       }
3284     }
3285   }
3286 
3287   /* General case: iscol -> iscol_local which has global size of iscol */
3288   if (call == MAT_REUSE_MATRIX) {
3289     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3290     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3291   } else {
3292     if (!iscol_local) {
3293       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3294     }
3295   }
3296 
3297   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3298   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3299 
3300   if (call == MAT_INITIAL_MATRIX) {
3301     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3302     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3303   }
3304   PetscFunctionReturn(0);
3305 }
3306 
3307 /*@C
3308      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3309          and "off-diagonal" part of the matrix in CSR format.
3310 
3311    Collective on MPI_Comm
3312 
3313    Input Parameters:
3314 +  comm - MPI communicator
3315 .  A - "diagonal" portion of matrix
3316 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3317 -  garray - global index of B columns
3318 
3319    Output Parameter:
3320 .   mat - the matrix, with input A as its local diagonal matrix
3321    Level: advanced
3322 
3323    Notes:
3324        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3325        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3326 
3327 .seealso: MatCreateMPIAIJWithSplitArrays()
3328 @*/
3329 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3330 {
3331   PetscErrorCode ierr;
3332   Mat_MPIAIJ     *maij;
3333   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3334   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3335   PetscScalar    *oa=b->a;
3336   Mat            Bnew;
3337   PetscInt       m,n,N;
3338 
3339   PetscFunctionBegin;
3340   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3341   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3342   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3343   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3344   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3345   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3346 
3347   /* Get global columns of mat */
3348   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3349 
3350   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3351   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3352   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3353   maij = (Mat_MPIAIJ*)(*mat)->data;
3354 
3355   (*mat)->preallocated = PETSC_TRUE;
3356 
3357   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3358   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3359 
3360   /* Set A as diagonal portion of *mat */
3361   maij->A = A;
3362 
3363   nz = oi[m];
3364   for (i=0; i<nz; i++) {
3365     col   = oj[i];
3366     oj[i] = garray[col];
3367   }
3368 
3369    /* Set Bnew as off-diagonal portion of *mat */
3370   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3371   bnew        = (Mat_SeqAIJ*)Bnew->data;
3372   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3373   maij->B     = Bnew;
3374 
3375   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3376 
3377   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3378   b->free_a       = PETSC_FALSE;
3379   b->free_ij      = PETSC_FALSE;
3380   ierr = MatDestroy(&B);CHKERRQ(ierr);
3381 
3382   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3383   bnew->free_a       = PETSC_TRUE;
3384   bnew->free_ij      = PETSC_TRUE;
3385 
3386   /* condense columns of maij->B */
3387   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3388   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3389   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3390   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3391   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3396 
3397 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3398 {
3399   PetscErrorCode ierr;
3400   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3401   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3402   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3403   Mat            M,Msub,B=a->B;
3404   MatScalar      *aa;
3405   Mat_SeqAIJ     *aij;
3406   PetscInt       *garray = a->garray,*colsub,Ncols;
3407   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3408   IS             iscol_sub,iscmap;
3409   const PetscInt *is_idx,*cmap;
3410   PetscBool      allcolumns=PETSC_FALSE;
3411   MPI_Comm       comm;
3412 
3413   PetscFunctionBegin;
3414   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3415 
3416   if (call == MAT_REUSE_MATRIX) {
3417     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3418     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3419     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3420 
3421     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3422     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3423 
3424     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3425     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3426 
3427     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3428 
3429   } else { /* call == MAT_INITIAL_MATRIX) */
3430     PetscBool flg;
3431 
3432     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3433     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3434 
3435     /* (1) iscol -> nonscalable iscol_local */
3436     /* Check for special case: each processor gets entire matrix columns */
3437     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3438     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3439     if (allcolumns) {
3440       iscol_sub = iscol_local;
3441       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3442       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3443 
3444     } else {
3445       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3446       PetscInt *idx,*cmap1,k;
3447       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3448       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3449       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3450       count = 0;
3451       k     = 0;
3452       for (i=0; i<Ncols; i++) {
3453         j = is_idx[i];
3454         if (j >= cstart && j < cend) {
3455           /* diagonal part of mat */
3456           idx[count]     = j;
3457           cmap1[count++] = i; /* column index in submat */
3458         } else if (Bn) {
3459           /* off-diagonal part of mat */
3460           if (j == garray[k]) {
3461             idx[count]     = j;
3462             cmap1[count++] = i;  /* column index in submat */
3463           } else if (j > garray[k]) {
3464             while (j > garray[k] && k < Bn-1) k++;
3465             if (j == garray[k]) {
3466               idx[count]     = j;
3467               cmap1[count++] = i; /* column index in submat */
3468             }
3469           }
3470         }
3471       }
3472       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3473 
3474       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3475       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3476       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3477 
3478       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3479     }
3480 
3481     /* (3) Create sequential Msub */
3482     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3483   }
3484 
3485   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3486   aij  = (Mat_SeqAIJ*)(Msub)->data;
3487   ii   = aij->i;
3488   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3489 
3490   /*
3491       m - number of local rows
3492       Ncols - number of columns (same on all processors)
3493       rstart - first row in new global matrix generated
3494   */
3495   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3496 
3497   if (call == MAT_INITIAL_MATRIX) {
3498     /* (4) Create parallel newmat */
3499     PetscMPIInt    rank,size;
3500     PetscInt       csize;
3501 
3502     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3503     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3504 
3505     /*
3506         Determine the number of non-zeros in the diagonal and off-diagonal
3507         portions of the matrix in order to do correct preallocation
3508     */
3509 
3510     /* first get start and end of "diagonal" columns */
3511     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3512     if (csize == PETSC_DECIDE) {
3513       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3514       if (mglobal == Ncols) { /* square matrix */
3515         nlocal = m;
3516       } else {
3517         nlocal = Ncols/size + ((Ncols % size) > rank);
3518       }
3519     } else {
3520       nlocal = csize;
3521     }
3522     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3523     rstart = rend - nlocal;
3524     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3525 
3526     /* next, compute all the lengths */
3527     jj    = aij->j;
3528     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3529     olens = dlens + m;
3530     for (i=0; i<m; i++) {
3531       jend = ii[i+1] - ii[i];
3532       olen = 0;
3533       dlen = 0;
3534       for (j=0; j<jend; j++) {
3535         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3536         else dlen++;
3537         jj++;
3538       }
3539       olens[i] = olen;
3540       dlens[i] = dlen;
3541     }
3542 
3543     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3544     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3545 
3546     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3547     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3548     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3549     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3550     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3551     ierr = PetscFree(dlens);CHKERRQ(ierr);
3552 
3553   } else { /* call == MAT_REUSE_MATRIX */
3554     M    = *newmat;
3555     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3556     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3557     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3558     /*
3559          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3560        rather than the slower MatSetValues().
3561     */
3562     M->was_assembled = PETSC_TRUE;
3563     M->assembled     = PETSC_FALSE;
3564   }
3565 
3566   /* (5) Set values of Msub to *newmat */
3567   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3568   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3569 
3570   jj   = aij->j;
3571   aa   = aij->a;
3572   for (i=0; i<m; i++) {
3573     row = rstart + i;
3574     nz  = ii[i+1] - ii[i];
3575     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3576     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3577     jj += nz; aa += nz;
3578   }
3579   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3580 
3581   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3582   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3583 
3584   ierr = PetscFree(colsub);CHKERRQ(ierr);
3585 
3586   /* save Msub, iscol_sub and iscmap used in processor for next request */
3587   if (call ==  MAT_INITIAL_MATRIX) {
3588     *newmat = M;
3589     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3590     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3591 
3592     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3593     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3594 
3595     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3596     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3597 
3598     if (iscol_local) {
3599       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3600       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3601     }
3602   }
3603   PetscFunctionReturn(0);
3604 }
3605 
3606 /*
3607     Not great since it makes two copies of the submatrix, first an SeqAIJ
3608   in local and then by concatenating the local matrices the end result.
3609   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3610 
3611   Note: This requires a sequential iscol with all indices.
3612 */
3613 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3614 {
3615   PetscErrorCode ierr;
3616   PetscMPIInt    rank,size;
3617   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3618   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3619   Mat            M,Mreuse;
3620   MatScalar      *aa,*vwork;
3621   MPI_Comm       comm;
3622   Mat_SeqAIJ     *aij;
3623   PetscBool      colflag,allcolumns=PETSC_FALSE;
3624 
3625   PetscFunctionBegin;
3626   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3627   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3628   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3629 
3630   /* Check for special case: each processor gets entire matrix columns */
3631   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3632   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3633   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3634 
3635   if (call ==  MAT_REUSE_MATRIX) {
3636     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3637     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3638     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3639   } else {
3640     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3641   }
3642 
3643   /*
3644       m - number of local rows
3645       n - number of columns (same on all processors)
3646       rstart - first row in new global matrix generated
3647   */
3648   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3649   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3650   if (call == MAT_INITIAL_MATRIX) {
3651     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3652     ii  = aij->i;
3653     jj  = aij->j;
3654 
3655     /*
3656         Determine the number of non-zeros in the diagonal and off-diagonal
3657         portions of the matrix in order to do correct preallocation
3658     */
3659 
3660     /* first get start and end of "diagonal" columns */
3661     if (csize == PETSC_DECIDE) {
3662       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3663       if (mglobal == n) { /* square matrix */
3664         nlocal = m;
3665       } else {
3666         nlocal = n/size + ((n % size) > rank);
3667       }
3668     } else {
3669       nlocal = csize;
3670     }
3671     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3672     rstart = rend - nlocal;
3673     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3674 
3675     /* next, compute all the lengths */
3676     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3677     olens = dlens + m;
3678     for (i=0; i<m; i++) {
3679       jend = ii[i+1] - ii[i];
3680       olen = 0;
3681       dlen = 0;
3682       for (j=0; j<jend; j++) {
3683         if (*jj < rstart || *jj >= rend) olen++;
3684         else dlen++;
3685         jj++;
3686       }
3687       olens[i] = olen;
3688       dlens[i] = dlen;
3689     }
3690     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3691     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3692     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3693     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3694     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3695     ierr = PetscFree(dlens);CHKERRQ(ierr);
3696   } else {
3697     PetscInt ml,nl;
3698 
3699     M    = *newmat;
3700     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3701     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3702     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3703     /*
3704          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3705        rather than the slower MatSetValues().
3706     */
3707     M->was_assembled = PETSC_TRUE;
3708     M->assembled     = PETSC_FALSE;
3709   }
3710   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3711   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3712   ii   = aij->i;
3713   jj   = aij->j;
3714   aa   = aij->a;
3715   for (i=0; i<m; i++) {
3716     row   = rstart + i;
3717     nz    = ii[i+1] - ii[i];
3718     cwork = jj;     jj += nz;
3719     vwork = aa;     aa += nz;
3720     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3721   }
3722 
3723   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3724   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3725   *newmat = M;
3726 
3727   /* save submatrix used in processor for next request */
3728   if (call ==  MAT_INITIAL_MATRIX) {
3729     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3730     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3731   }
3732   PetscFunctionReturn(0);
3733 }
3734 
3735 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3736 {
3737   PetscInt       m,cstart, cend,j,nnz,i,d;
3738   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3739   const PetscInt *JJ;
3740   PetscScalar    *values;
3741   PetscErrorCode ierr;
3742   PetscBool      nooffprocentries;
3743 
3744   PetscFunctionBegin;
3745   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3746 
3747   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3748   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3749   m      = B->rmap->n;
3750   cstart = B->cmap->rstart;
3751   cend   = B->cmap->rend;
3752   rstart = B->rmap->rstart;
3753 
3754   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3755 
3756 #if defined(PETSC_USE_DEBUG)
3757   for (i=0; i<m; i++) {
3758     nnz = Ii[i+1]- Ii[i];
3759     JJ  = J + Ii[i];
3760     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3761     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3762     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3763   }
3764 #endif
3765 
3766   for (i=0; i<m; i++) {
3767     nnz     = Ii[i+1]- Ii[i];
3768     JJ      = J + Ii[i];
3769     nnz_max = PetscMax(nnz_max,nnz);
3770     d       = 0;
3771     for (j=0; j<nnz; j++) {
3772       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3773     }
3774     d_nnz[i] = d;
3775     o_nnz[i] = nnz - d;
3776   }
3777   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3778   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3779 
3780   if (v) values = (PetscScalar*)v;
3781   else {
3782     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3783   }
3784 
3785   for (i=0; i<m; i++) {
3786     ii   = i + rstart;
3787     nnz  = Ii[i+1]- Ii[i];
3788     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3789   }
3790   nooffprocentries    = B->nooffprocentries;
3791   B->nooffprocentries = PETSC_TRUE;
3792   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3793   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3794   B->nooffprocentries = nooffprocentries;
3795 
3796   if (!v) {
3797     ierr = PetscFree(values);CHKERRQ(ierr);
3798   }
3799   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3800   PetscFunctionReturn(0);
3801 }
3802 
3803 /*@
3804    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3805    (the default parallel PETSc format).
3806 
3807    Collective on MPI_Comm
3808 
3809    Input Parameters:
3810 +  B - the matrix
3811 .  i - the indices into j for the start of each local row (starts with zero)
3812 .  j - the column indices for each local row (starts with zero)
3813 -  v - optional values in the matrix
3814 
3815    Level: developer
3816 
3817    Notes:
3818        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3819      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3820      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3821 
3822        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3823 
3824        The format which is used for the sparse matrix input, is equivalent to a
3825     row-major ordering.. i.e for the following matrix, the input data expected is
3826     as shown
3827 
3828 $        1 0 0
3829 $        2 0 3     P0
3830 $       -------
3831 $        4 5 6     P1
3832 $
3833 $     Process0 [P0]: rows_owned=[0,1]
3834 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3835 $        j =  {0,0,2}  [size = 3]
3836 $        v =  {1,2,3}  [size = 3]
3837 $
3838 $     Process1 [P1]: rows_owned=[2]
3839 $        i =  {0,3}    [size = nrow+1  = 1+1]
3840 $        j =  {0,1,2}  [size = 3]
3841 $        v =  {4,5,6}  [size = 3]
3842 
3843 .keywords: matrix, aij, compressed row, sparse, parallel
3844 
3845 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3846           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3847 @*/
3848 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3849 {
3850   PetscErrorCode ierr;
3851 
3852   PetscFunctionBegin;
3853   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 /*@C
3858    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3859    (the default parallel PETSc format).  For good matrix assembly performance
3860    the user should preallocate the matrix storage by setting the parameters
3861    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3862    performance can be increased by more than a factor of 50.
3863 
3864    Collective on MPI_Comm
3865 
3866    Input Parameters:
3867 +  B - the matrix
3868 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3869            (same value is used for all local rows)
3870 .  d_nnz - array containing the number of nonzeros in the various rows of the
3871            DIAGONAL portion of the local submatrix (possibly different for each row)
3872            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3873            The size of this array is equal to the number of local rows, i.e 'm'.
3874            For matrices that will be factored, you must leave room for (and set)
3875            the diagonal entry even if it is zero.
3876 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3877            submatrix (same value is used for all local rows).
3878 -  o_nnz - array containing the number of nonzeros in the various rows of the
3879            OFF-DIAGONAL portion of the local submatrix (possibly different for
3880            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3881            structure. The size of this array is equal to the number
3882            of local rows, i.e 'm'.
3883 
3884    If the *_nnz parameter is given then the *_nz parameter is ignored
3885 
3886    The AIJ format (also called the Yale sparse matrix format or
3887    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3888    storage.  The stored row and column indices begin with zero.
3889    See Users-Manual: ch_mat for details.
3890 
3891    The parallel matrix is partitioned such that the first m0 rows belong to
3892    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3893    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3894 
3895    The DIAGONAL portion of the local submatrix of a processor can be defined
3896    as the submatrix which is obtained by extraction the part corresponding to
3897    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3898    first row that belongs to the processor, r2 is the last row belonging to
3899    the this processor, and c1-c2 is range of indices of the local part of a
3900    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3901    common case of a square matrix, the row and column ranges are the same and
3902    the DIAGONAL part is also square. The remaining portion of the local
3903    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3904 
3905    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3906 
3907    You can call MatGetInfo() to get information on how effective the preallocation was;
3908    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3909    You can also run with the option -info and look for messages with the string
3910    malloc in them to see if additional memory allocation was needed.
3911 
3912    Example usage:
3913 
3914    Consider the following 8x8 matrix with 34 non-zero values, that is
3915    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3916    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3917    as follows:
3918 
3919 .vb
3920             1  2  0  |  0  3  0  |  0  4
3921     Proc0   0  5  6  |  7  0  0  |  8  0
3922             9  0 10  | 11  0  0  | 12  0
3923     -------------------------------------
3924            13  0 14  | 15 16 17  |  0  0
3925     Proc1   0 18  0  | 19 20 21  |  0  0
3926             0  0  0  | 22 23  0  | 24  0
3927     -------------------------------------
3928     Proc2  25 26 27  |  0  0 28  | 29  0
3929            30  0  0  | 31 32 33  |  0 34
3930 .ve
3931 
3932    This can be represented as a collection of submatrices as:
3933 
3934 .vb
3935       A B C
3936       D E F
3937       G H I
3938 .ve
3939 
3940    Where the submatrices A,B,C are owned by proc0, D,E,F are
3941    owned by proc1, G,H,I are owned by proc2.
3942 
3943    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3944    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3945    The 'M','N' parameters are 8,8, and have the same values on all procs.
3946 
3947    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3948    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3949    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3950    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3951    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3952    matrix, ans [DF] as another SeqAIJ matrix.
3953 
3954    When d_nz, o_nz parameters are specified, d_nz storage elements are
3955    allocated for every row of the local diagonal submatrix, and o_nz
3956    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3957    One way to choose d_nz and o_nz is to use the max nonzerors per local
3958    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3959    In this case, the values of d_nz,o_nz are:
3960 .vb
3961      proc0 : dnz = 2, o_nz = 2
3962      proc1 : dnz = 3, o_nz = 2
3963      proc2 : dnz = 1, o_nz = 4
3964 .ve
3965    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3966    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3967    for proc3. i.e we are using 12+15+10=37 storage locations to store
3968    34 values.
3969 
3970    When d_nnz, o_nnz parameters are specified, the storage is specified
3971    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3972    In the above case the values for d_nnz,o_nnz are:
3973 .vb
3974      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3975      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3976      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3977 .ve
3978    Here the space allocated is sum of all the above values i.e 34, and
3979    hence pre-allocation is perfect.
3980 
3981    Level: intermediate
3982 
3983 .keywords: matrix, aij, compressed row, sparse, parallel
3984 
3985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3986           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3987 @*/
3988 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3989 {
3990   PetscErrorCode ierr;
3991 
3992   PetscFunctionBegin;
3993   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3994   PetscValidType(B,1);
3995   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3996   PetscFunctionReturn(0);
3997 }
3998 
3999 /*@
4000      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4001          CSR format the local rows.
4002 
4003    Collective on MPI_Comm
4004 
4005    Input Parameters:
4006 +  comm - MPI communicator
4007 .  m - number of local rows (Cannot be PETSC_DECIDE)
4008 .  n - This value should be the same as the local size used in creating the
4009        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4010        calculated if N is given) For square matrices n is almost always m.
4011 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4012 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4013 .   i - row indices
4014 .   j - column indices
4015 -   a - matrix values
4016 
4017    Output Parameter:
4018 .   mat - the matrix
4019 
4020    Level: intermediate
4021 
4022    Notes:
4023        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4024      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4025      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4026 
4027        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4028 
4029        The format which is used for the sparse matrix input, is equivalent to a
4030     row-major ordering.. i.e for the following matrix, the input data expected is
4031     as shown
4032 
4033 $        1 0 0
4034 $        2 0 3     P0
4035 $       -------
4036 $        4 5 6     P1
4037 $
4038 $     Process0 [P0]: rows_owned=[0,1]
4039 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4040 $        j =  {0,0,2}  [size = 3]
4041 $        v =  {1,2,3}  [size = 3]
4042 $
4043 $     Process1 [P1]: rows_owned=[2]
4044 $        i =  {0,3}    [size = nrow+1  = 1+1]
4045 $        j =  {0,1,2}  [size = 3]
4046 $        v =  {4,5,6}  [size = 3]
4047 
4048 .keywords: matrix, aij, compressed row, sparse, parallel
4049 
4050 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4051           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4052 @*/
4053 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4054 {
4055   PetscErrorCode ierr;
4056 
4057   PetscFunctionBegin;
4058   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4059   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4060   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4061   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4062   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4063   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4064   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4065   PetscFunctionReturn(0);
4066 }
4067 
4068 /*@C
4069    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4070    (the default parallel PETSc format).  For good matrix assembly performance
4071    the user should preallocate the matrix storage by setting the parameters
4072    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4073    performance can be increased by more than a factor of 50.
4074 
4075    Collective on MPI_Comm
4076 
4077    Input Parameters:
4078 +  comm - MPI communicator
4079 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4080            This value should be the same as the local size used in creating the
4081            y vector for the matrix-vector product y = Ax.
4082 .  n - This value should be the same as the local size used in creating the
4083        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4084        calculated if N is given) For square matrices n is almost always m.
4085 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4086 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4087 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4088            (same value is used for all local rows)
4089 .  d_nnz - array containing the number of nonzeros in the various rows of the
4090            DIAGONAL portion of the local submatrix (possibly different for each row)
4091            or NULL, if d_nz is used to specify the nonzero structure.
4092            The size of this array is equal to the number of local rows, i.e 'm'.
4093 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4094            submatrix (same value is used for all local rows).
4095 -  o_nnz - array containing the number of nonzeros in the various rows of the
4096            OFF-DIAGONAL portion of the local submatrix (possibly different for
4097            each row) or NULL, if o_nz is used to specify the nonzero
4098            structure. The size of this array is equal to the number
4099            of local rows, i.e 'm'.
4100 
4101    Output Parameter:
4102 .  A - the matrix
4103 
4104    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4105    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4106    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4107 
4108    Notes:
4109    If the *_nnz parameter is given then the *_nz parameter is ignored
4110 
4111    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4112    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4113    storage requirements for this matrix.
4114 
4115    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4116    processor than it must be used on all processors that share the object for
4117    that argument.
4118 
4119    The user MUST specify either the local or global matrix dimensions
4120    (possibly both).
4121 
4122    The parallel matrix is partitioned across processors such that the
4123    first m0 rows belong to process 0, the next m1 rows belong to
4124    process 1, the next m2 rows belong to process 2 etc.. where
4125    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4126    values corresponding to [m x N] submatrix.
4127 
4128    The columns are logically partitioned with the n0 columns belonging
4129    to 0th partition, the next n1 columns belonging to the next
4130    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4131 
4132    The DIAGONAL portion of the local submatrix on any given processor
4133    is the submatrix corresponding to the rows and columns m,n
4134    corresponding to the given processor. i.e diagonal matrix on
4135    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4136    etc. The remaining portion of the local submatrix [m x (N-n)]
4137    constitute the OFF-DIAGONAL portion. The example below better
4138    illustrates this concept.
4139 
4140    For a square global matrix we define each processor's diagonal portion
4141    to be its local rows and the corresponding columns (a square submatrix);
4142    each processor's off-diagonal portion encompasses the remainder of the
4143    local matrix (a rectangular submatrix).
4144 
4145    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4146 
4147    When calling this routine with a single process communicator, a matrix of
4148    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4149    type of communicator, use the construction mechanism
4150 .vb
4151      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4152 .ve
4153 
4154 $     MatCreate(...,&A);
4155 $     MatSetType(A,MATMPIAIJ);
4156 $     MatSetSizes(A, m,n,M,N);
4157 $     MatMPIAIJSetPreallocation(A,...);
4158 
4159    By default, this format uses inodes (identical nodes) when possible.
4160    We search for consecutive rows with the same nonzero structure, thereby
4161    reusing matrix information to achieve increased efficiency.
4162 
4163    Options Database Keys:
4164 +  -mat_no_inode  - Do not use inodes
4165 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4166 
4167 
4168 
4169    Example usage:
4170 
4171    Consider the following 8x8 matrix with 34 non-zero values, that is
4172    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4173    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4174    as follows
4175 
4176 .vb
4177             1  2  0  |  0  3  0  |  0  4
4178     Proc0   0  5  6  |  7  0  0  |  8  0
4179             9  0 10  | 11  0  0  | 12  0
4180     -------------------------------------
4181            13  0 14  | 15 16 17  |  0  0
4182     Proc1   0 18  0  | 19 20 21  |  0  0
4183             0  0  0  | 22 23  0  | 24  0
4184     -------------------------------------
4185     Proc2  25 26 27  |  0  0 28  | 29  0
4186            30  0  0  | 31 32 33  |  0 34
4187 .ve
4188 
4189    This can be represented as a collection of submatrices as
4190 
4191 .vb
4192       A B C
4193       D E F
4194       G H I
4195 .ve
4196 
4197    Where the submatrices A,B,C are owned by proc0, D,E,F are
4198    owned by proc1, G,H,I are owned by proc2.
4199 
4200    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4201    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4202    The 'M','N' parameters are 8,8, and have the same values on all procs.
4203 
4204    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4205    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4206    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4207    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4208    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4209    matrix, ans [DF] as another SeqAIJ matrix.
4210 
4211    When d_nz, o_nz parameters are specified, d_nz storage elements are
4212    allocated for every row of the local diagonal submatrix, and o_nz
4213    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4214    One way to choose d_nz and o_nz is to use the max nonzerors per local
4215    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4216    In this case, the values of d_nz,o_nz are
4217 .vb
4218      proc0 : dnz = 2, o_nz = 2
4219      proc1 : dnz = 3, o_nz = 2
4220      proc2 : dnz = 1, o_nz = 4
4221 .ve
4222    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4223    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4224    for proc3. i.e we are using 12+15+10=37 storage locations to store
4225    34 values.
4226 
4227    When d_nnz, o_nnz parameters are specified, the storage is specified
4228    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4229    In the above case the values for d_nnz,o_nnz are
4230 .vb
4231      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4232      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4233      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4234 .ve
4235    Here the space allocated is sum of all the above values i.e 34, and
4236    hence pre-allocation is perfect.
4237 
4238    Level: intermediate
4239 
4240 .keywords: matrix, aij, compressed row, sparse, parallel
4241 
4242 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4243           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4244 @*/
4245 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4246 {
4247   PetscErrorCode ierr;
4248   PetscMPIInt    size;
4249 
4250   PetscFunctionBegin;
4251   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4252   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4254   if (size > 1) {
4255     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4256     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4257   } else {
4258     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4259     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4260   }
4261   PetscFunctionReturn(0);
4262 }
4263 
4264 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4265 {
4266   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4267   PetscBool      flg;
4268   PetscErrorCode ierr;
4269 
4270   PetscFunctionBegin;
4271   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4272   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4273   if (Ad)     *Ad     = a->A;
4274   if (Ao)     *Ao     = a->B;
4275   if (colmap) *colmap = a->garray;
4276   PetscFunctionReturn(0);
4277 }
4278 
4279 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4280 {
4281   PetscErrorCode ierr;
4282   PetscInt       m,N,i,rstart,nnz,Ii;
4283   PetscInt       *indx;
4284   PetscScalar    *values;
4285 
4286   PetscFunctionBegin;
4287   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4288   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4289     PetscInt       *dnz,*onz,sum,bs,cbs;
4290 
4291     if (n == PETSC_DECIDE) {
4292       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4293     }
4294     /* Check sum(n) = N */
4295     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4296     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4297 
4298     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4299     rstart -= m;
4300 
4301     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4302     for (i=0; i<m; i++) {
4303       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4304       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4305       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4306     }
4307 
4308     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4309     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4310     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4311     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4312     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4313     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4314     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4315     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4316   }
4317 
4318   /* numeric phase */
4319   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4320   for (i=0; i<m; i++) {
4321     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4322     Ii   = i + rstart;
4323     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4324     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4325   }
4326   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4327   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4332 {
4333   PetscErrorCode    ierr;
4334   PetscMPIInt       rank;
4335   PetscInt          m,N,i,rstart,nnz;
4336   size_t            len;
4337   const PetscInt    *indx;
4338   PetscViewer       out;
4339   char              *name;
4340   Mat               B;
4341   const PetscScalar *values;
4342 
4343   PetscFunctionBegin;
4344   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4345   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4346   /* Should this be the type of the diagonal block of A? */
4347   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4348   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4349   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4350   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4351   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4352   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4353   for (i=0; i<m; i++) {
4354     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4355     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4356     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4357   }
4358   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4359   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4360 
4361   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4362   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4363   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4364   sprintf(name,"%s.%d",outfile,rank);
4365   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4366   ierr = PetscFree(name);CHKERRQ(ierr);
4367   ierr = MatView(B,out);CHKERRQ(ierr);
4368   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4369   ierr = MatDestroy(&B);CHKERRQ(ierr);
4370   PetscFunctionReturn(0);
4371 }
4372 
4373 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4374 {
4375   PetscErrorCode      ierr;
4376   Mat_Merge_SeqsToMPI *merge;
4377   PetscContainer      container;
4378 
4379   PetscFunctionBegin;
4380   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4381   if (container) {
4382     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4383     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4384     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4385     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4386     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4387     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4388     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4389     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4390     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4391     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4392     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4393     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4394     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4395     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4396     ierr = PetscFree(merge);CHKERRQ(ierr);
4397     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4398   }
4399   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4400   PetscFunctionReturn(0);
4401 }
4402 
4403 #include <../src/mat/utils/freespace.h>
4404 #include <petscbt.h>
4405 
4406 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4407 {
4408   PetscErrorCode      ierr;
4409   MPI_Comm            comm;
4410   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4411   PetscMPIInt         size,rank,taga,*len_s;
4412   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4413   PetscInt            proc,m;
4414   PetscInt            **buf_ri,**buf_rj;
4415   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4416   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4417   MPI_Request         *s_waits,*r_waits;
4418   MPI_Status          *status;
4419   MatScalar           *aa=a->a;
4420   MatScalar           **abuf_r,*ba_i;
4421   Mat_Merge_SeqsToMPI *merge;
4422   PetscContainer      container;
4423 
4424   PetscFunctionBegin;
4425   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4426   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4427 
4428   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4429   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4430 
4431   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4432   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4433 
4434   bi     = merge->bi;
4435   bj     = merge->bj;
4436   buf_ri = merge->buf_ri;
4437   buf_rj = merge->buf_rj;
4438 
4439   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4440   owners = merge->rowmap->range;
4441   len_s  = merge->len_s;
4442 
4443   /* send and recv matrix values */
4444   /*-----------------------------*/
4445   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4446   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4447 
4448   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4449   for (proc=0,k=0; proc<size; proc++) {
4450     if (!len_s[proc]) continue;
4451     i    = owners[proc];
4452     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4453     k++;
4454   }
4455 
4456   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4457   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4458   ierr = PetscFree(status);CHKERRQ(ierr);
4459 
4460   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4461   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4462 
4463   /* insert mat values of mpimat */
4464   /*----------------------------*/
4465   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4466   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4467 
4468   for (k=0; k<merge->nrecv; k++) {
4469     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4470     nrows       = *(buf_ri_k[k]);
4471     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4472     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4473   }
4474 
4475   /* set values of ba */
4476   m = merge->rowmap->n;
4477   for (i=0; i<m; i++) {
4478     arow = owners[rank] + i;
4479     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4480     bnzi = bi[i+1] - bi[i];
4481     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4482 
4483     /* add local non-zero vals of this proc's seqmat into ba */
4484     anzi   = ai[arow+1] - ai[arow];
4485     aj     = a->j + ai[arow];
4486     aa     = a->a + ai[arow];
4487     nextaj = 0;
4488     for (j=0; nextaj<anzi; j++) {
4489       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4490         ba_i[j] += aa[nextaj++];
4491       }
4492     }
4493 
4494     /* add received vals into ba */
4495     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4496       /* i-th row */
4497       if (i == *nextrow[k]) {
4498         anzi   = *(nextai[k]+1) - *nextai[k];
4499         aj     = buf_rj[k] + *(nextai[k]);
4500         aa     = abuf_r[k] + *(nextai[k]);
4501         nextaj = 0;
4502         for (j=0; nextaj<anzi; j++) {
4503           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4504             ba_i[j] += aa[nextaj++];
4505           }
4506         }
4507         nextrow[k]++; nextai[k]++;
4508       }
4509     }
4510     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4511   }
4512   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4513   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4514 
4515   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4516   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4517   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4518   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4519   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4520   PetscFunctionReturn(0);
4521 }
4522 
4523 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4524 {
4525   PetscErrorCode      ierr;
4526   Mat                 B_mpi;
4527   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4528   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4529   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4530   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4531   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4532   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4533   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4534   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4535   MPI_Status          *status;
4536   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4537   PetscBT             lnkbt;
4538   Mat_Merge_SeqsToMPI *merge;
4539   PetscContainer      container;
4540 
4541   PetscFunctionBegin;
4542   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4543 
4544   /* make sure it is a PETSc comm */
4545   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4546   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4547   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4548 
4549   ierr = PetscNew(&merge);CHKERRQ(ierr);
4550   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4551 
4552   /* determine row ownership */
4553   /*---------------------------------------------------------*/
4554   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4555   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4556   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4557   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4558   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4559   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4560   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4561 
4562   m      = merge->rowmap->n;
4563   owners = merge->rowmap->range;
4564 
4565   /* determine the number of messages to send, their lengths */
4566   /*---------------------------------------------------------*/
4567   len_s = merge->len_s;
4568 
4569   len          = 0; /* length of buf_si[] */
4570   merge->nsend = 0;
4571   for (proc=0; proc<size; proc++) {
4572     len_si[proc] = 0;
4573     if (proc == rank) {
4574       len_s[proc] = 0;
4575     } else {
4576       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4577       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4578     }
4579     if (len_s[proc]) {
4580       merge->nsend++;
4581       nrows = 0;
4582       for (i=owners[proc]; i<owners[proc+1]; i++) {
4583         if (ai[i+1] > ai[i]) nrows++;
4584       }
4585       len_si[proc] = 2*(nrows+1);
4586       len         += len_si[proc];
4587     }
4588   }
4589 
4590   /* determine the number and length of messages to receive for ij-structure */
4591   /*-------------------------------------------------------------------------*/
4592   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4593   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4594 
4595   /* post the Irecv of j-structure */
4596   /*-------------------------------*/
4597   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4598   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4599 
4600   /* post the Isend of j-structure */
4601   /*--------------------------------*/
4602   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4603 
4604   for (proc=0, k=0; proc<size; proc++) {
4605     if (!len_s[proc]) continue;
4606     i    = owners[proc];
4607     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4608     k++;
4609   }
4610 
4611   /* receives and sends of j-structure are complete */
4612   /*------------------------------------------------*/
4613   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4614   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4615 
4616   /* send and recv i-structure */
4617   /*---------------------------*/
4618   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4619   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4620 
4621   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4622   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4623   for (proc=0,k=0; proc<size; proc++) {
4624     if (!len_s[proc]) continue;
4625     /* form outgoing message for i-structure:
4626          buf_si[0]:                 nrows to be sent
4627                [1:nrows]:           row index (global)
4628                [nrows+1:2*nrows+1]: i-structure index
4629     */
4630     /*-------------------------------------------*/
4631     nrows       = len_si[proc]/2 - 1;
4632     buf_si_i    = buf_si + nrows+1;
4633     buf_si[0]   = nrows;
4634     buf_si_i[0] = 0;
4635     nrows       = 0;
4636     for (i=owners[proc]; i<owners[proc+1]; i++) {
4637       anzi = ai[i+1] - ai[i];
4638       if (anzi) {
4639         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4640         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4641         nrows++;
4642       }
4643     }
4644     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4645     k++;
4646     buf_si += len_si[proc];
4647   }
4648 
4649   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4650   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4651 
4652   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4653   for (i=0; i<merge->nrecv; i++) {
4654     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4655   }
4656 
4657   ierr = PetscFree(len_si);CHKERRQ(ierr);
4658   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4659   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4660   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4661   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4662   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4663   ierr = PetscFree(status);CHKERRQ(ierr);
4664 
4665   /* compute a local seq matrix in each processor */
4666   /*----------------------------------------------*/
4667   /* allocate bi array and free space for accumulating nonzero column info */
4668   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4669   bi[0] = 0;
4670 
4671   /* create and initialize a linked list */
4672   nlnk = N+1;
4673   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4674 
4675   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4676   len  = ai[owners[rank+1]] - ai[owners[rank]];
4677   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4678 
4679   current_space = free_space;
4680 
4681   /* determine symbolic info for each local row */
4682   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4683 
4684   for (k=0; k<merge->nrecv; k++) {
4685     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4686     nrows       = *buf_ri_k[k];
4687     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4688     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4689   }
4690 
4691   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4692   len  = 0;
4693   for (i=0; i<m; i++) {
4694     bnzi = 0;
4695     /* add local non-zero cols of this proc's seqmat into lnk */
4696     arow  = owners[rank] + i;
4697     anzi  = ai[arow+1] - ai[arow];
4698     aj    = a->j + ai[arow];
4699     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4700     bnzi += nlnk;
4701     /* add received col data into lnk */
4702     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4703       if (i == *nextrow[k]) { /* i-th row */
4704         anzi  = *(nextai[k]+1) - *nextai[k];
4705         aj    = buf_rj[k] + *nextai[k];
4706         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4707         bnzi += nlnk;
4708         nextrow[k]++; nextai[k]++;
4709       }
4710     }
4711     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4712 
4713     /* if free space is not available, make more free space */
4714     if (current_space->local_remaining<bnzi) {
4715       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4716       nspacedouble++;
4717     }
4718     /* copy data into free space, then initialize lnk */
4719     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4720     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4721 
4722     current_space->array           += bnzi;
4723     current_space->local_used      += bnzi;
4724     current_space->local_remaining -= bnzi;
4725 
4726     bi[i+1] = bi[i] + bnzi;
4727   }
4728 
4729   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4730 
4731   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4732   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4733   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4734 
4735   /* create symbolic parallel matrix B_mpi */
4736   /*---------------------------------------*/
4737   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4738   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4739   if (n==PETSC_DECIDE) {
4740     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4741   } else {
4742     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4743   }
4744   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4745   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4746   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4747   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4748   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4749 
4750   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4751   B_mpi->assembled    = PETSC_FALSE;
4752   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4753   merge->bi           = bi;
4754   merge->bj           = bj;
4755   merge->buf_ri       = buf_ri;
4756   merge->buf_rj       = buf_rj;
4757   merge->coi          = NULL;
4758   merge->coj          = NULL;
4759   merge->owners_co    = NULL;
4760 
4761   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4762 
4763   /* attach the supporting struct to B_mpi for reuse */
4764   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4765   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4766   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4767   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4768   *mpimat = B_mpi;
4769 
4770   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4771   PetscFunctionReturn(0);
4772 }
4773 
4774 /*@C
4775       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4776                  matrices from each processor
4777 
4778     Collective on MPI_Comm
4779 
4780    Input Parameters:
4781 +    comm - the communicators the parallel matrix will live on
4782 .    seqmat - the input sequential matrices
4783 .    m - number of local rows (or PETSC_DECIDE)
4784 .    n - number of local columns (or PETSC_DECIDE)
4785 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4786 
4787    Output Parameter:
4788 .    mpimat - the parallel matrix generated
4789 
4790     Level: advanced
4791 
4792    Notes:
4793      The dimensions of the sequential matrix in each processor MUST be the same.
4794      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4795      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4796 @*/
4797 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4798 {
4799   PetscErrorCode ierr;
4800   PetscMPIInt    size;
4801 
4802   PetscFunctionBegin;
4803   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4804   if (size == 1) {
4805     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4806     if (scall == MAT_INITIAL_MATRIX) {
4807       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4808     } else {
4809       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4810     }
4811     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4812     PetscFunctionReturn(0);
4813   }
4814   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4815   if (scall == MAT_INITIAL_MATRIX) {
4816     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4817   }
4818   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4819   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4820   PetscFunctionReturn(0);
4821 }
4822 
4823 /*@
4824      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4825           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4826           with MatGetSize()
4827 
4828     Not Collective
4829 
4830    Input Parameters:
4831 +    A - the matrix
4832 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4833 
4834    Output Parameter:
4835 .    A_loc - the local sequential matrix generated
4836 
4837     Level: developer
4838 
4839 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4840 
4841 @*/
4842 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4843 {
4844   PetscErrorCode ierr;
4845   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4846   Mat_SeqAIJ     *mat,*a,*b;
4847   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4848   MatScalar      *aa,*ba,*cam;
4849   PetscScalar    *ca;
4850   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4851   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4852   PetscBool      match;
4853   MPI_Comm       comm;
4854   PetscMPIInt    size;
4855 
4856   PetscFunctionBegin;
4857   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4858   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4859   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4860   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4861   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4862 
4863   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4864   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4865   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4866   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4867   aa = a->a; ba = b->a;
4868   if (scall == MAT_INITIAL_MATRIX) {
4869     if (size == 1) {
4870       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4871       PetscFunctionReturn(0);
4872     }
4873 
4874     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4875     ci[0] = 0;
4876     for (i=0; i<am; i++) {
4877       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4878     }
4879     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4880     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4881     k    = 0;
4882     for (i=0; i<am; i++) {
4883       ncols_o = bi[i+1] - bi[i];
4884       ncols_d = ai[i+1] - ai[i];
4885       /* off-diagonal portion of A */
4886       for (jo=0; jo<ncols_o; jo++) {
4887         col = cmap[*bj];
4888         if (col >= cstart) break;
4889         cj[k]   = col; bj++;
4890         ca[k++] = *ba++;
4891       }
4892       /* diagonal portion of A */
4893       for (j=0; j<ncols_d; j++) {
4894         cj[k]   = cstart + *aj++;
4895         ca[k++] = *aa++;
4896       }
4897       /* off-diagonal portion of A */
4898       for (j=jo; j<ncols_o; j++) {
4899         cj[k]   = cmap[*bj++];
4900         ca[k++] = *ba++;
4901       }
4902     }
4903     /* put together the new matrix */
4904     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4905     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4906     /* Since these are PETSc arrays, change flags to free them as necessary. */
4907     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4908     mat->free_a  = PETSC_TRUE;
4909     mat->free_ij = PETSC_TRUE;
4910     mat->nonew   = 0;
4911   } else if (scall == MAT_REUSE_MATRIX) {
4912     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4913     ci = mat->i; cj = mat->j; cam = mat->a;
4914     for (i=0; i<am; i++) {
4915       /* off-diagonal portion of A */
4916       ncols_o = bi[i+1] - bi[i];
4917       for (jo=0; jo<ncols_o; jo++) {
4918         col = cmap[*bj];
4919         if (col >= cstart) break;
4920         *cam++ = *ba++; bj++;
4921       }
4922       /* diagonal portion of A */
4923       ncols_d = ai[i+1] - ai[i];
4924       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4925       /* off-diagonal portion of A */
4926       for (j=jo; j<ncols_o; j++) {
4927         *cam++ = *ba++; bj++;
4928       }
4929     }
4930   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4931   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4932   PetscFunctionReturn(0);
4933 }
4934 
4935 /*@C
4936      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4937 
4938     Not Collective
4939 
4940    Input Parameters:
4941 +    A - the matrix
4942 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4943 -    row, col - index sets of rows and columns to extract (or NULL)
4944 
4945    Output Parameter:
4946 .    A_loc - the local sequential matrix generated
4947 
4948     Level: developer
4949 
4950 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4951 
4952 @*/
4953 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4954 {
4955   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4956   PetscErrorCode ierr;
4957   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4958   IS             isrowa,iscola;
4959   Mat            *aloc;
4960   PetscBool      match;
4961 
4962   PetscFunctionBegin;
4963   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4964   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4965   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4966   if (!row) {
4967     start = A->rmap->rstart; end = A->rmap->rend;
4968     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4969   } else {
4970     isrowa = *row;
4971   }
4972   if (!col) {
4973     start = A->cmap->rstart;
4974     cmap  = a->garray;
4975     nzA   = a->A->cmap->n;
4976     nzB   = a->B->cmap->n;
4977     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4978     ncols = 0;
4979     for (i=0; i<nzB; i++) {
4980       if (cmap[i] < start) idx[ncols++] = cmap[i];
4981       else break;
4982     }
4983     imark = i;
4984     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4985     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4986     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4987   } else {
4988     iscola = *col;
4989   }
4990   if (scall != MAT_INITIAL_MATRIX) {
4991     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4992     aloc[0] = *A_loc;
4993   }
4994   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4995   *A_loc = aloc[0];
4996   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4997   if (!row) {
4998     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4999   }
5000   if (!col) {
5001     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5002   }
5003   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5004   PetscFunctionReturn(0);
5005 }
5006 
5007 /*@C
5008     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5009 
5010     Collective on Mat
5011 
5012    Input Parameters:
5013 +    A,B - the matrices in mpiaij format
5014 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5015 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5016 
5017    Output Parameter:
5018 +    rowb, colb - index sets of rows and columns of B to extract
5019 -    B_seq - the sequential matrix generated
5020 
5021     Level: developer
5022 
5023 @*/
5024 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5025 {
5026   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5027   PetscErrorCode ierr;
5028   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5029   IS             isrowb,iscolb;
5030   Mat            *bseq=NULL;
5031 
5032   PetscFunctionBegin;
5033   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5034     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5035   }
5036   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5037 
5038   if (scall == MAT_INITIAL_MATRIX) {
5039     start = A->cmap->rstart;
5040     cmap  = a->garray;
5041     nzA   = a->A->cmap->n;
5042     nzB   = a->B->cmap->n;
5043     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5044     ncols = 0;
5045     for (i=0; i<nzB; i++) {  /* row < local row index */
5046       if (cmap[i] < start) idx[ncols++] = cmap[i];
5047       else break;
5048     }
5049     imark = i;
5050     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5051     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5052     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5053     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5054   } else {
5055     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5056     isrowb  = *rowb; iscolb = *colb;
5057     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5058     bseq[0] = *B_seq;
5059   }
5060   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5061   *B_seq = bseq[0];
5062   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5063   if (!rowb) {
5064     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5065   } else {
5066     *rowb = isrowb;
5067   }
5068   if (!colb) {
5069     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5070   } else {
5071     *colb = iscolb;
5072   }
5073   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5074   PetscFunctionReturn(0);
5075 }
5076 
5077 /*
5078     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5079     of the OFF-DIAGONAL portion of local A
5080 
5081     Collective on Mat
5082 
5083    Input Parameters:
5084 +    A,B - the matrices in mpiaij format
5085 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5086 
5087    Output Parameter:
5088 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5089 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5090 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5091 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5092 
5093     Level: developer
5094 
5095 */
5096 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5097 {
5098   VecScatter_MPI_General *gen_to,*gen_from;
5099   PetscErrorCode         ierr;
5100   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5101   Mat_SeqAIJ             *b_oth;
5102   VecScatter             ctx =a->Mvctx;
5103   MPI_Comm               comm;
5104   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5105   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5106   PetscInt               *rvalues,*svalues;
5107   MatScalar              *b_otha,*bufa,*bufA;
5108   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5109   MPI_Request            *rwaits = NULL,*swaits = NULL;
5110   MPI_Status             *sstatus,rstatus;
5111   PetscMPIInt            jj,size;
5112   PetscInt               *cols,sbs,rbs;
5113   PetscScalar            *vals;
5114 
5115   PetscFunctionBegin;
5116   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5117   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5118 
5119   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5120     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5121   }
5122   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5123   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5124 
5125   if (size == 1) {
5126     startsj_s = NULL;
5127     bufa_ptr  = NULL;
5128     *B_oth    = NULL;
5129     PetscFunctionReturn(0);
5130   }
5131 
5132   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5133   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5134   nrecvs   = gen_from->n;
5135   nsends   = gen_to->n;
5136 
5137   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5138   srow    = gen_to->indices;    /* local row index to be sent */
5139   sstarts = gen_to->starts;
5140   sprocs  = gen_to->procs;
5141   sstatus = gen_to->sstatus;
5142   sbs     = gen_to->bs;
5143   rstarts = gen_from->starts;
5144   rprocs  = gen_from->procs;
5145   rbs     = gen_from->bs;
5146 
5147   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5148   if (scall == MAT_INITIAL_MATRIX) {
5149     /* i-array */
5150     /*---------*/
5151     /*  post receives */
5152     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5153     for (i=0; i<nrecvs; i++) {
5154       rowlen = rvalues + rstarts[i]*rbs;
5155       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5156       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5157     }
5158 
5159     /* pack the outgoing message */
5160     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5161 
5162     sstartsj[0] = 0;
5163     rstartsj[0] = 0;
5164     len         = 0; /* total length of j or a array to be sent */
5165     k           = 0;
5166     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5167     for (i=0; i<nsends; i++) {
5168       rowlen = svalues + sstarts[i]*sbs;
5169       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5170       for (j=0; j<nrows; j++) {
5171         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5172         for (l=0; l<sbs; l++) {
5173           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5174 
5175           rowlen[j*sbs+l] = ncols;
5176 
5177           len += ncols;
5178           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5179         }
5180         k++;
5181       }
5182       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5183 
5184       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5185     }
5186     /* recvs and sends of i-array are completed */
5187     i = nrecvs;
5188     while (i--) {
5189       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5190     }
5191     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5192     ierr = PetscFree(svalues);CHKERRQ(ierr);
5193 
5194     /* allocate buffers for sending j and a arrays */
5195     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5196     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5197 
5198     /* create i-array of B_oth */
5199     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5200 
5201     b_othi[0] = 0;
5202     len       = 0; /* total length of j or a array to be received */
5203     k         = 0;
5204     for (i=0; i<nrecvs; i++) {
5205       rowlen = rvalues + rstarts[i]*rbs;
5206       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5207       for (j=0; j<nrows; j++) {
5208         b_othi[k+1] = b_othi[k] + rowlen[j];
5209         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5210         k++;
5211       }
5212       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5213     }
5214     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5215 
5216     /* allocate space for j and a arrrays of B_oth */
5217     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5218     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5219 
5220     /* j-array */
5221     /*---------*/
5222     /*  post receives of j-array */
5223     for (i=0; i<nrecvs; i++) {
5224       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5225       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5226     }
5227 
5228     /* pack the outgoing message j-array */
5229     k = 0;
5230     for (i=0; i<nsends; i++) {
5231       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5232       bufJ  = bufj+sstartsj[i];
5233       for (j=0; j<nrows; j++) {
5234         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5235         for (ll=0; ll<sbs; ll++) {
5236           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5237           for (l=0; l<ncols; l++) {
5238             *bufJ++ = cols[l];
5239           }
5240           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5241         }
5242       }
5243       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5244     }
5245 
5246     /* recvs and sends of j-array are completed */
5247     i = nrecvs;
5248     while (i--) {
5249       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5250     }
5251     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5252   } else if (scall == MAT_REUSE_MATRIX) {
5253     sstartsj = *startsj_s;
5254     rstartsj = *startsj_r;
5255     bufa     = *bufa_ptr;
5256     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5257     b_otha   = b_oth->a;
5258   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5259 
5260   /* a-array */
5261   /*---------*/
5262   /*  post receives of a-array */
5263   for (i=0; i<nrecvs; i++) {
5264     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5265     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5266   }
5267 
5268   /* pack the outgoing message a-array */
5269   k = 0;
5270   for (i=0; i<nsends; i++) {
5271     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5272     bufA  = bufa+sstartsj[i];
5273     for (j=0; j<nrows; j++) {
5274       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5275       for (ll=0; ll<sbs; ll++) {
5276         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5277         for (l=0; l<ncols; l++) {
5278           *bufA++ = vals[l];
5279         }
5280         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5281       }
5282     }
5283     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5284   }
5285   /* recvs and sends of a-array are completed */
5286   i = nrecvs;
5287   while (i--) {
5288     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5289   }
5290   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5291   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5292 
5293   if (scall == MAT_INITIAL_MATRIX) {
5294     /* put together the new matrix */
5295     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5296 
5297     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5298     /* Since these are PETSc arrays, change flags to free them as necessary. */
5299     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5300     b_oth->free_a  = PETSC_TRUE;
5301     b_oth->free_ij = PETSC_TRUE;
5302     b_oth->nonew   = 0;
5303 
5304     ierr = PetscFree(bufj);CHKERRQ(ierr);
5305     if (!startsj_s || !bufa_ptr) {
5306       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5307       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5308     } else {
5309       *startsj_s = sstartsj;
5310       *startsj_r = rstartsj;
5311       *bufa_ptr  = bufa;
5312     }
5313   }
5314   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5315   PetscFunctionReturn(0);
5316 }
5317 
5318 /*@C
5319   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5320 
5321   Not Collective
5322 
5323   Input Parameters:
5324 . A - The matrix in mpiaij format
5325 
5326   Output Parameter:
5327 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5328 . colmap - A map from global column index to local index into lvec
5329 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5330 
5331   Level: developer
5332 
5333 @*/
5334 #if defined(PETSC_USE_CTABLE)
5335 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5336 #else
5337 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5338 #endif
5339 {
5340   Mat_MPIAIJ *a;
5341 
5342   PetscFunctionBegin;
5343   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5344   PetscValidPointer(lvec, 2);
5345   PetscValidPointer(colmap, 3);
5346   PetscValidPointer(multScatter, 4);
5347   a = (Mat_MPIAIJ*) A->data;
5348   if (lvec) *lvec = a->lvec;
5349   if (colmap) *colmap = a->colmap;
5350   if (multScatter) *multScatter = a->Mvctx;
5351   PetscFunctionReturn(0);
5352 }
5353 
5354 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5355 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5356 #if defined(PETSC_HAVE_MKL_SPARSE)
5357 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5358 #endif
5359 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5360 #if defined(PETSC_HAVE_ELEMENTAL)
5361 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5362 #endif
5363 #if defined(PETSC_HAVE_HYPRE)
5364 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5365 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5366 #endif
5367 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5368 
5369 /*
5370     Computes (B'*A')' since computing B*A directly is untenable
5371 
5372                n                       p                          p
5373         (              )       (              )         (                  )
5374       m (      A       )  *  n (       B      )   =   m (         C        )
5375         (              )       (              )         (                  )
5376 
5377 */
5378 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5379 {
5380   PetscErrorCode ierr;
5381   Mat            At,Bt,Ct;
5382 
5383   PetscFunctionBegin;
5384   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5385   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5386   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5387   ierr = MatDestroy(&At);CHKERRQ(ierr);
5388   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5389   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5390   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5391   PetscFunctionReturn(0);
5392 }
5393 
5394 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5395 {
5396   PetscErrorCode ierr;
5397   PetscInt       m=A->rmap->n,n=B->cmap->n;
5398   Mat            Cmat;
5399 
5400   PetscFunctionBegin;
5401   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5402   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5403   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5404   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5405   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5406   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5407   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5408   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5409 
5410   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5411 
5412   *C = Cmat;
5413   PetscFunctionReturn(0);
5414 }
5415 
5416 /* ----------------------------------------------------------------*/
5417 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5418 {
5419   PetscErrorCode ierr;
5420 
5421   PetscFunctionBegin;
5422   if (scall == MAT_INITIAL_MATRIX) {
5423     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5424     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5425     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5426   }
5427   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5428   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5429   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5430   PetscFunctionReturn(0);
5431 }
5432 
5433 /*MC
5434    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5435 
5436    Options Database Keys:
5437 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5438 
5439   Level: beginner
5440 
5441 .seealso: MatCreateAIJ()
5442 M*/
5443 
5444 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5445 {
5446   Mat_MPIAIJ     *b;
5447   PetscErrorCode ierr;
5448   PetscMPIInt    size;
5449 
5450   PetscFunctionBegin;
5451   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5452 
5453   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5454   B->data       = (void*)b;
5455   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5456   B->assembled  = PETSC_FALSE;
5457   B->insertmode = NOT_SET_VALUES;
5458   b->size       = size;
5459 
5460   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5461 
5462   /* build cache for off array entries formed */
5463   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5464 
5465   b->donotstash  = PETSC_FALSE;
5466   b->colmap      = 0;
5467   b->garray      = 0;
5468   b->roworiented = PETSC_TRUE;
5469 
5470   /* stuff used for matrix vector multiply */
5471   b->lvec  = NULL;
5472   b->Mvctx = NULL;
5473 
5474   /* stuff for MatGetRow() */
5475   b->rowindices   = 0;
5476   b->rowvalues    = 0;
5477   b->getrowactive = PETSC_FALSE;
5478 
5479   /* flexible pointer used in CUSP/CUSPARSE classes */
5480   b->spptr = NULL;
5481 
5482   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5483   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5484   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5485   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5486   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5487   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5488   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5489   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5490 #if defined(PETSC_HAVE_MKL_SPARSE)
5491   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5492 #endif
5493   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5494   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5495 #if defined(PETSC_HAVE_ELEMENTAL)
5496   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5497 #endif
5498 #if defined(PETSC_HAVE_HYPRE)
5499   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5500 #endif
5501   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5502   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5503   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5504   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5505 #if defined(PETSC_HAVE_HYPRE)
5506   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5507 #endif
5508   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5509   PetscFunctionReturn(0);
5510 }
5511 
5512 /*@C
5513      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5514          and "off-diagonal" part of the matrix in CSR format.
5515 
5516    Collective on MPI_Comm
5517 
5518    Input Parameters:
5519 +  comm - MPI communicator
5520 .  m - number of local rows (Cannot be PETSC_DECIDE)
5521 .  n - This value should be the same as the local size used in creating the
5522        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5523        calculated if N is given) For square matrices n is almost always m.
5524 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5525 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5526 .   i - row indices for "diagonal" portion of matrix
5527 .   j - column indices
5528 .   a - matrix values
5529 .   oi - row indices for "off-diagonal" portion of matrix
5530 .   oj - column indices
5531 -   oa - matrix values
5532 
5533    Output Parameter:
5534 .   mat - the matrix
5535 
5536    Level: advanced
5537 
5538    Notes:
5539        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5540        must free the arrays once the matrix has been destroyed and not before.
5541 
5542        The i and j indices are 0 based
5543 
5544        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5545 
5546        This sets local rows and cannot be used to set off-processor values.
5547 
5548        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5549        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5550        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5551        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5552        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5553        communication if it is known that only local entries will be set.
5554 
5555 .keywords: matrix, aij, compressed row, sparse, parallel
5556 
5557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5558           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5559 @*/
5560 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5561 {
5562   PetscErrorCode ierr;
5563   Mat_MPIAIJ     *maij;
5564 
5565   PetscFunctionBegin;
5566   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5567   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5568   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5569   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5570   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5571   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5572   maij = (Mat_MPIAIJ*) (*mat)->data;
5573 
5574   (*mat)->preallocated = PETSC_TRUE;
5575 
5576   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5577   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5578 
5579   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5580   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5581 
5582   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5583   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5584   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5585   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5586 
5587   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5588   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5589   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5590   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5591   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5592   PetscFunctionReturn(0);
5593 }
5594 
5595 /*
5596     Special version for direct calls from Fortran
5597 */
5598 #include <petsc/private/fortranimpl.h>
5599 
5600 /* Change these macros so can be used in void function */
5601 #undef CHKERRQ
5602 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5603 #undef SETERRQ2
5604 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5605 #undef SETERRQ3
5606 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5607 #undef SETERRQ
5608 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5609 
5610 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5611 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5612 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5613 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5614 #else
5615 #endif
5616 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5617 {
5618   Mat            mat  = *mmat;
5619   PetscInt       m    = *mm, n = *mn;
5620   InsertMode     addv = *maddv;
5621   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5622   PetscScalar    value;
5623   PetscErrorCode ierr;
5624 
5625   MatCheckPreallocated(mat,1);
5626   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5627 
5628 #if defined(PETSC_USE_DEBUG)
5629   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5630 #endif
5631   {
5632     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5633     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5634     PetscBool roworiented = aij->roworiented;
5635 
5636     /* Some Variables required in the macro */
5637     Mat        A                 = aij->A;
5638     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5639     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5640     MatScalar  *aa               = a->a;
5641     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5642     Mat        B                 = aij->B;
5643     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5644     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5645     MatScalar  *ba               = b->a;
5646 
5647     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5648     PetscInt  nonew = a->nonew;
5649     MatScalar *ap1,*ap2;
5650 
5651     PetscFunctionBegin;
5652     for (i=0; i<m; i++) {
5653       if (im[i] < 0) continue;
5654 #if defined(PETSC_USE_DEBUG)
5655       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5656 #endif
5657       if (im[i] >= rstart && im[i] < rend) {
5658         row      = im[i] - rstart;
5659         lastcol1 = -1;
5660         rp1      = aj + ai[row];
5661         ap1      = aa + ai[row];
5662         rmax1    = aimax[row];
5663         nrow1    = ailen[row];
5664         low1     = 0;
5665         high1    = nrow1;
5666         lastcol2 = -1;
5667         rp2      = bj + bi[row];
5668         ap2      = ba + bi[row];
5669         rmax2    = bimax[row];
5670         nrow2    = bilen[row];
5671         low2     = 0;
5672         high2    = nrow2;
5673 
5674         for (j=0; j<n; j++) {
5675           if (roworiented) value = v[i*n+j];
5676           else value = v[i+j*m];
5677           if (in[j] >= cstart && in[j] < cend) {
5678             col = in[j] - cstart;
5679             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5680             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5681           } else if (in[j] < 0) continue;
5682 #if defined(PETSC_USE_DEBUG)
5683           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5684 #endif
5685           else {
5686             if (mat->was_assembled) {
5687               if (!aij->colmap) {
5688                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5689               }
5690 #if defined(PETSC_USE_CTABLE)
5691               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5692               col--;
5693 #else
5694               col = aij->colmap[in[j]] - 1;
5695 #endif
5696               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5697               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5698                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5699                 col  =  in[j];
5700                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5701                 B     = aij->B;
5702                 b     = (Mat_SeqAIJ*)B->data;
5703                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5704                 rp2   = bj + bi[row];
5705                 ap2   = ba + bi[row];
5706                 rmax2 = bimax[row];
5707                 nrow2 = bilen[row];
5708                 low2  = 0;
5709                 high2 = nrow2;
5710                 bm    = aij->B->rmap->n;
5711                 ba    = b->a;
5712               }
5713             } else col = in[j];
5714             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5715           }
5716         }
5717       } else if (!aij->donotstash) {
5718         if (roworiented) {
5719           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5720         } else {
5721           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5722         }
5723       }
5724     }
5725   }
5726   PetscFunctionReturnVoid();
5727 }
5728 
5729