xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0a351717185171305da1018df269886a27fd85d5)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1042 {
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1051 {
1052   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1053   PetscErrorCode ierr;
1054 
1055   PetscFunctionBegin;
1056   /* do nondiagonal part */
1057   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1058   /* send it on its way */
1059   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1060   /* do local part */
1061   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   /* receive remote parts */
1063   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   PetscErrorCode ierr;
1074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1075 
1076   PetscFunctionBegin;
1077   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1078   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1079   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1090   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1095 {
1096   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100 #if defined(PETSC_USE_LOG)
1101   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1102 #endif
1103   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1105   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1106   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1107 #if defined(PETSC_USE_CTABLE)
1108   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1109 #else
1110   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1111 #endif
1112   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1113   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1114   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1115   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1116   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1117   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1118   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1119 
1120   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1129 #if defined(PETSC_HAVE_ELEMENTAL)
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1131 #endif
1132 #if defined(PETSC_HAVE_HYPRE)
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1135 #endif
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1143   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1144   PetscErrorCode ierr;
1145   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1146   int            fd;
1147   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1148   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1149   PetscScalar    *column_values;
1150   PetscInt       message_count,flowcontrolcount;
1151   FILE           *file;
1152 
1153   PetscFunctionBegin;
1154   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1155   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1156   nz   = A->nz + B->nz;
1157   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1158   if (!rank) {
1159     header[0] = MAT_FILE_CLASSID;
1160     header[1] = mat->rmap->N;
1161     header[2] = mat->cmap->N;
1162 
1163     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1164     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1165     /* get largest number of rows any processor has */
1166     rlen  = mat->rmap->n;
1167     range = mat->rmap->range;
1168     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1169   } else {
1170     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1171     rlen = mat->rmap->n;
1172   }
1173 
1174   /* load up the local row counts */
1175   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1176   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1177 
1178   /* store the row lengths to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1182     for (i=1; i<size; i++) {
1183       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1184       rlen = range[i+1] - range[i];
1185       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1187     }
1188     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1189   } else {
1190     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1191     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1192     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1193   }
1194   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1195 
1196   /* load up the local column indices */
1197   nzmax = nz; /* th processor needs space a largest processor needs */
1198   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1199   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1200   cnt   = 0;
1201   for (i=0; i<mat->rmap->n; i++) {
1202     for (j=B->i[i]; j<B->i[i+1]; j++) {
1203       if ((col = garray[B->j[j]]) > cstart) break;
1204       column_indices[cnt++] = col;
1205     }
1206     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1207     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1208   }
1209   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1210 
1211   /* store the column indices to the file */
1212   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1213   if (!rank) {
1214     MPI_Status status;
1215     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1216     for (i=1; i<size; i++) {
1217       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1218       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1219       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1220       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1221       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     }
1223     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1224   } else {
1225     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1226     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1229   }
1230   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1231 
1232   /* load up the local column values */
1233   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1234   cnt  = 0;
1235   for (i=0; i<mat->rmap->n; i++) {
1236     for (j=B->i[i]; j<B->i[i+1]; j++) {
1237       if (garray[B->j[j]] > cstart) break;
1238       column_values[cnt++] = B->a[j];
1239     }
1240     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1241     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1242   }
1243   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1244 
1245   /* store the column values to the file */
1246   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1247   if (!rank) {
1248     MPI_Status status;
1249     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1250     for (i=1; i<size; i++) {
1251       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1252       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1253       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1254       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1255       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1256     }
1257     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1258   } else {
1259     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1260     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1263   }
1264   ierr = PetscFree(column_values);CHKERRQ(ierr);
1265 
1266   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1267   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #include <petscdraw.h>
1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1273 {
1274   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1275   PetscErrorCode    ierr;
1276   PetscMPIInt       rank = aij->rank,size = aij->size;
1277   PetscBool         isdraw,iascii,isbinary;
1278   PetscViewer       sviewer;
1279   PetscViewerFormat format;
1280 
1281   PetscFunctionBegin;
1282   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1283   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1284   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1285   if (iascii) {
1286     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscBool inodes;
1290 
1291       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1292       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1293       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1295       if (!inodes) {
1296         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1297                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1298       } else {
1299         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1300                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1301       }
1302       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1304       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1306       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1309       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1314       if (inodes) {
1315         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1316       } else {
1317         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1326       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1327     } else {
1328       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1336     if (isnull) PetscFunctionReturn(0);
1337   }
1338 
1339   {
1340     /* assemble the entire matrix onto first processor. */
1341     Mat        A;
1342     Mat_SeqAIJ *Aloc;
1343     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1344     MatScalar  *a;
1345 
1346     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1347     if (!rank) {
1348       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1349     } else {
1350       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1351     }
1352     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1353     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1354     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1355     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1356     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1357 
1358     /* copy over the A part */
1359     Aloc = (Mat_SeqAIJ*)aij->A->data;
1360     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1361     row  = mat->rmap->rstart;
1362     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1363     for (i=0; i<m; i++) {
1364       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1365       row++;
1366       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1367     }
1368     aj = Aloc->j;
1369     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1370 
1371     /* copy over the B part */
1372     Aloc = (Mat_SeqAIJ*)aij->B->data;
1373     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1374     row  = mat->rmap->rstart;
1375     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1376     ct   = cols;
1377     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1378     for (i=0; i<m; i++) {
1379       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1380       row++;
1381       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1382     }
1383     ierr = PetscFree(ct);CHKERRQ(ierr);
1384     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1385     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1386     /*
1387        Everyone has to call to draw the matrix since the graphics waits are
1388        synchronized across all processors that share the PetscDraw object
1389     */
1390     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1391     if (!rank) {
1392       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1393       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1394     }
1395     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1396     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1397     ierr = MatDestroy(&A);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1403 {
1404   PetscErrorCode ierr;
1405   PetscBool      iascii,isdraw,issocket,isbinary;
1406 
1407   PetscFunctionBegin;
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1409   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1410   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1411   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1412   if (iascii || isdraw || isbinary || issocket) {
1413     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1414   }
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1419 {
1420   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1421   PetscErrorCode ierr;
1422   Vec            bb1 = 0;
1423   PetscBool      hasop;
1424 
1425   PetscFunctionBegin;
1426   if (flag == SOR_APPLY_UPPER) {
1427     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1428     PetscFunctionReturn(0);
1429   }
1430 
1431   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1432     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1433   }
1434 
1435   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1436     if (flag & SOR_ZERO_INITIAL_GUESS) {
1437       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1438       its--;
1439     }
1440 
1441     while (its--) {
1442       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1444 
1445       /* update rhs: bb1 = bb - B*x */
1446       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1447       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1448 
1449       /* local sweep */
1450       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1451     }
1452   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1453     if (flag & SOR_ZERO_INITIAL_GUESS) {
1454       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1455       its--;
1456     }
1457     while (its--) {
1458       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1459       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460 
1461       /* update rhs: bb1 = bb - B*x */
1462       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1463       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1464 
1465       /* local sweep */
1466       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1467     }
1468   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1469     if (flag & SOR_ZERO_INITIAL_GUESS) {
1470       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1471       its--;
1472     }
1473     while (its--) {
1474       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1475       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476 
1477       /* update rhs: bb1 = bb - B*x */
1478       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1479       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1480 
1481       /* local sweep */
1482       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1483     }
1484   } else if (flag & SOR_EISENSTAT) {
1485     Vec xx1;
1486 
1487     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1488     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1489 
1490     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492     if (!mat->diag) {
1493       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1494       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1495     }
1496     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1497     if (hasop) {
1498       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1499     } else {
1500       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1501     }
1502     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1503 
1504     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1505 
1506     /* local sweep */
1507     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1508     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1509     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1510   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1511 
1512   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1513 
1514   matin->factorerrortype = mat->A->factorerrortype;
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1519 {
1520   Mat            aA,aB,Aperm;
1521   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1522   PetscScalar    *aa,*ba;
1523   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1524   PetscSF        rowsf,sf;
1525   IS             parcolp = NULL;
1526   PetscBool      done;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1531   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1532   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1533   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1534 
1535   /* Invert row permutation to find out where my rows should go */
1536   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1537   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1538   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1539   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1540   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1541   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1542 
1543   /* Invert column permutation to find out where my columns should go */
1544   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1545   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1546   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1547   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1548   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1549   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1550   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1551 
1552   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1553   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1554   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1555 
1556   /* Find out where my gcols should go */
1557   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1558   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1560   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1561   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1562   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1563   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1564   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1565 
1566   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1567   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1568   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1569   for (i=0; i<m; i++) {
1570     PetscInt row = rdest[i],rowner;
1571     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1572     for (j=ai[i]; j<ai[i+1]; j++) {
1573       PetscInt cowner,col = cdest[aj[j]];
1574       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1575       if (rowner == cowner) dnnz[i]++;
1576       else onnz[i]++;
1577     }
1578     for (j=bi[i]; j<bi[i+1]; j++) {
1579       PetscInt cowner,col = gcdest[bj[j]];
1580       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1581       if (rowner == cowner) dnnz[i]++;
1582       else onnz[i]++;
1583     }
1584   }
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1587   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1588   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1589   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1590 
1591   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1592   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1593   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1594   for (i=0; i<m; i++) {
1595     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1596     PetscInt j0,rowlen;
1597     rowlen = ai[i+1] - ai[i];
1598     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1599       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1600       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1601     }
1602     rowlen = bi[i+1] - bi[i];
1603     for (j0=j=0; j<rowlen; j0=j) {
1604       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1605       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1606     }
1607   }
1608   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1609   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1610   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1611   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1612   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1613   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1614   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1615   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1616   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1617   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1618   *B = Aperm;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1623 {
1624   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1625   PetscErrorCode ierr;
1626 
1627   PetscFunctionBegin;
1628   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1629   if (ghosts) *ghosts = aij->garray;
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1634 {
1635   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1636   Mat            A    = mat->A,B = mat->B;
1637   PetscErrorCode ierr;
1638   PetscReal      isend[5],irecv[5];
1639 
1640   PetscFunctionBegin;
1641   info->block_size = 1.0;
1642   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1643 
1644   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1645   isend[3] = info->memory;  isend[4] = info->mallocs;
1646 
1647   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1648 
1649   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1650   isend[3] += info->memory;  isend[4] += info->mallocs;
1651   if (flag == MAT_LOCAL) {
1652     info->nz_used      = isend[0];
1653     info->nz_allocated = isend[1];
1654     info->nz_unneeded  = isend[2];
1655     info->memory       = isend[3];
1656     info->mallocs      = isend[4];
1657   } else if (flag == MAT_GLOBAL_MAX) {
1658     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1659 
1660     info->nz_used      = irecv[0];
1661     info->nz_allocated = irecv[1];
1662     info->nz_unneeded  = irecv[2];
1663     info->memory       = irecv[3];
1664     info->mallocs      = irecv[4];
1665   } else if (flag == MAT_GLOBAL_SUM) {
1666     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1667 
1668     info->nz_used      = irecv[0];
1669     info->nz_allocated = irecv[1];
1670     info->nz_unneeded  = irecv[2];
1671     info->memory       = irecv[3];
1672     info->mallocs      = irecv[4];
1673   }
1674   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1675   info->fill_ratio_needed = 0;
1676   info->factor_mallocs    = 0;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1681 {
1682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   switch (op) {
1687   case MAT_NEW_NONZERO_LOCATIONS:
1688   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1689   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1690   case MAT_KEEP_NONZERO_PATTERN:
1691   case MAT_NEW_NONZERO_LOCATION_ERR:
1692   case MAT_USE_INODES:
1693   case MAT_IGNORE_ZERO_ENTRIES:
1694     MatCheckPreallocated(A,1);
1695     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1696     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1697     break;
1698   case MAT_ROW_ORIENTED:
1699     MatCheckPreallocated(A,1);
1700     a->roworiented = flg;
1701 
1702     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1703     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1704     break;
1705   case MAT_NEW_DIAGONALS:
1706     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   case MAT_SPD:
1712     A->spd_set = PETSC_TRUE;
1713     A->spd     = flg;
1714     if (flg) {
1715       A->symmetric                  = PETSC_TRUE;
1716       A->structurally_symmetric     = PETSC_TRUE;
1717       A->symmetric_set              = PETSC_TRUE;
1718       A->structurally_symmetric_set = PETSC_TRUE;
1719     }
1720     break;
1721   case MAT_SYMMETRIC:
1722     MatCheckPreallocated(A,1);
1723     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_STRUCTURALLY_SYMMETRIC:
1726     MatCheckPreallocated(A,1);
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_HERMITIAN:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     break;
1733   case MAT_SYMMETRY_ETERNAL:
1734     MatCheckPreallocated(A,1);
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_SUBMAT_SINGLEIS:
1738     A->submat_singleis = flg;
1739     break;
1740   case MAT_STRUCTURE_ONLY:
1741     /* The option is handled directly by MatSetOption() */
1742     break;
1743   default:
1744     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1750 {
1751   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1752   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1753   PetscErrorCode ierr;
1754   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1755   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1756   PetscInt       *cmap,*idx_p;
1757 
1758   PetscFunctionBegin;
1759   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1760   mat->getrowactive = PETSC_TRUE;
1761 
1762   if (!mat->rowvalues && (idx || v)) {
1763     /*
1764         allocate enough space to hold information from the longest row.
1765     */
1766     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1767     PetscInt   max = 1,tmp;
1768     for (i=0; i<matin->rmap->n; i++) {
1769       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1770       if (max < tmp) max = tmp;
1771     }
1772     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1773   }
1774 
1775   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1776   lrow = row - rstart;
1777 
1778   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1779   if (!v)   {pvA = 0; pvB = 0;}
1780   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1781   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1782   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1783   nztot = nzA + nzB;
1784 
1785   cmap = mat->garray;
1786   if (v  || idx) {
1787     if (nztot) {
1788       /* Sort by increasing column numbers, assuming A and B already sorted */
1789       PetscInt imark = -1;
1790       if (v) {
1791         *v = v_p = mat->rowvalues;
1792         for (i=0; i<nzB; i++) {
1793           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1794           else break;
1795         }
1796         imark = i;
1797         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1798         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1799       }
1800       if (idx) {
1801         *idx = idx_p = mat->rowindices;
1802         if (imark > -1) {
1803           for (i=0; i<imark; i++) {
1804             idx_p[i] = cmap[cworkB[i]];
1805           }
1806         } else {
1807           for (i=0; i<nzB; i++) {
1808             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1809             else break;
1810           }
1811           imark = i;
1812         }
1813         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1814         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1815       }
1816     } else {
1817       if (idx) *idx = 0;
1818       if (v)   *v   = 0;
1819     }
1820   }
1821   *nz  = nztot;
1822   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1823   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1828 {
1829   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1830 
1831   PetscFunctionBegin;
1832   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1833   aij->getrowactive = PETSC_FALSE;
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1838 {
1839   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1840   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1841   PetscErrorCode ierr;
1842   PetscInt       i,j,cstart = mat->cmap->rstart;
1843   PetscReal      sum = 0.0;
1844   MatScalar      *v;
1845 
1846   PetscFunctionBegin;
1847   if (aij->size == 1) {
1848     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1849   } else {
1850     if (type == NORM_FROBENIUS) {
1851       v = amat->a;
1852       for (i=0; i<amat->nz; i++) {
1853         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1854       }
1855       v = bmat->a;
1856       for (i=0; i<bmat->nz; i++) {
1857         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1858       }
1859       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1860       *norm = PetscSqrtReal(*norm);
1861       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1862     } else if (type == NORM_1) { /* max column norm */
1863       PetscReal *tmp,*tmp2;
1864       PetscInt  *jj,*garray = aij->garray;
1865       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1866       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1867       *norm = 0.0;
1868       v     = amat->a; jj = amat->j;
1869       for (j=0; j<amat->nz; j++) {
1870         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1871       }
1872       v = bmat->a; jj = bmat->j;
1873       for (j=0; j<bmat->nz; j++) {
1874         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1875       }
1876       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1877       for (j=0; j<mat->cmap->N; j++) {
1878         if (tmp2[j] > *norm) *norm = tmp2[j];
1879       }
1880       ierr = PetscFree(tmp);CHKERRQ(ierr);
1881       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1882       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1883     } else if (type == NORM_INFINITY) { /* max row norm */
1884       PetscReal ntemp = 0.0;
1885       for (j=0; j<aij->A->rmap->n; j++) {
1886         v   = amat->a + amat->i[j];
1887         sum = 0.0;
1888         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1889           sum += PetscAbsScalar(*v); v++;
1890         }
1891         v = bmat->a + bmat->i[j];
1892         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v); v++;
1894         }
1895         if (sum > ntemp) ntemp = sum;
1896       }
1897       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1898       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1899     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1905 {
1906   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1907   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1908   PetscErrorCode ierr;
1909   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1910   PetscInt       cstart = A->cmap->rstart,ncol;
1911   Mat            B;
1912   MatScalar      *array;
1913 
1914   PetscFunctionBegin;
1915   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1916 
1917   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1918   ai = Aloc->i; aj = Aloc->j;
1919   bi = Bloc->i; bj = Bloc->j;
1920   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1921     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1922     PetscSFNode          *oloc;
1923     PETSC_UNUSED PetscSF sf;
1924 
1925     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1926     /* compute d_nnz for preallocation */
1927     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1928     for (i=0; i<ai[ma]; i++) {
1929       d_nnz[aj[i]]++;
1930       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1931     }
1932     /* compute local off-diagonal contributions */
1933     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1934     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1935     /* map those to global */
1936     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1937     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1938     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1939     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1940     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1941     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1942     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1943 
1944     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1945     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1946     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1947     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1948     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1949     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1950   } else {
1951     B    = *matout;
1952     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1953     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1954   }
1955 
1956   /* copy over the A part */
1957   array = Aloc->a;
1958   row   = A->rmap->rstart;
1959   for (i=0; i<ma; i++) {
1960     ncol = ai[i+1]-ai[i];
1961     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1962     row++;
1963     array += ncol; aj += ncol;
1964   }
1965   aj = Aloc->j;
1966   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1967 
1968   /* copy over the B part */
1969   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1970   array = Bloc->a;
1971   row   = A->rmap->rstart;
1972   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1973   cols_tmp = cols;
1974   for (i=0; i<mb; i++) {
1975     ncol = bi[i+1]-bi[i];
1976     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1977     row++;
1978     array += ncol; cols_tmp += ncol;
1979   }
1980   ierr = PetscFree(cols);CHKERRQ(ierr);
1981 
1982   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1983   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1984   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1985     *matout = B;
1986   } else {
1987     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1993 {
1994   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1995   Mat            a    = aij->A,b = aij->B;
1996   PetscErrorCode ierr;
1997   PetscInt       s1,s2,s3;
1998 
1999   PetscFunctionBegin;
2000   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2001   if (rr) {
2002     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2003     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2004     /* Overlap communication with computation. */
2005     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2006   }
2007   if (ll) {
2008     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2009     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2010     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2011   }
2012   /* scale  the diagonal block */
2013   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2014 
2015   if (rr) {
2016     /* Do a scatter end and then right scale the off-diagonal block */
2017     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2018     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2019   }
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2024 {
2025   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2026   PetscErrorCode ierr;
2027 
2028   PetscFunctionBegin;
2029   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2034 {
2035   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2036   Mat            a,b,c,d;
2037   PetscBool      flg;
2038   PetscErrorCode ierr;
2039 
2040   PetscFunctionBegin;
2041   a = matA->A; b = matA->B;
2042   c = matB->A; d = matB->B;
2043 
2044   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2045   if (flg) {
2046     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2047   }
2048   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2053 {
2054   PetscErrorCode ierr;
2055   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2056   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2057 
2058   PetscFunctionBegin;
2059   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2060   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2061     /* because of the column compression in the off-processor part of the matrix a->B,
2062        the number of columns in a->B and b->B may be different, hence we cannot call
2063        the MatCopy() directly on the two parts. If need be, we can provide a more
2064        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2065        then copying the submatrices */
2066     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2067   } else {
2068     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2069     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2070   }
2071   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2076 {
2077   PetscErrorCode ierr;
2078 
2079   PetscFunctionBegin;
2080   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 /*
2085    Computes the number of nonzeros per row needed for preallocation when X and Y
2086    have different nonzero structure.
2087 */
2088 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2089 {
2090   PetscInt       i,j,k,nzx,nzy;
2091 
2092   PetscFunctionBegin;
2093   /* Set the number of nonzeros in the new matrix */
2094   for (i=0; i<m; i++) {
2095     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2096     nzx = xi[i+1] - xi[i];
2097     nzy = yi[i+1] - yi[i];
2098     nnz[i] = 0;
2099     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2100       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2101       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2102       nnz[i]++;
2103     }
2104     for (; k<nzy; k++) nnz[i]++;
2105   }
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2110 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2111 {
2112   PetscErrorCode ierr;
2113   PetscInt       m = Y->rmap->N;
2114   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2115   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2116 
2117   PetscFunctionBegin;
2118   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2123 {
2124   PetscErrorCode ierr;
2125   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2126   PetscBLASInt   bnz,one=1;
2127   Mat_SeqAIJ     *x,*y;
2128 
2129   PetscFunctionBegin;
2130   if (str == SAME_NONZERO_PATTERN) {
2131     PetscScalar alpha = a;
2132     x    = (Mat_SeqAIJ*)xx->A->data;
2133     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2134     y    = (Mat_SeqAIJ*)yy->A->data;
2135     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2136     x    = (Mat_SeqAIJ*)xx->B->data;
2137     y    = (Mat_SeqAIJ*)yy->B->data;
2138     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2139     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2140     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2141   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2142     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2143   } else {
2144     Mat      B;
2145     PetscInt *nnz_d,*nnz_o;
2146     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2147     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2148     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2149     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2150     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2151     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2152     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2153     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2154     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2155     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2156     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2157     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2158     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2159     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2165 
2166 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2167 {
2168 #if defined(PETSC_USE_COMPLEX)
2169   PetscErrorCode ierr;
2170   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2174   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2175 #else
2176   PetscFunctionBegin;
2177 #endif
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2182 {
2183   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2188   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2193 {
2194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2195   PetscErrorCode ierr;
2196 
2197   PetscFunctionBegin;
2198   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2199   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207   PetscInt       i,*idxb = 0;
2208   PetscScalar    *va,*vb;
2209   Vec            vtmp;
2210 
2211   PetscFunctionBegin;
2212   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2213   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2214   if (idx) {
2215     for (i=0; i<A->rmap->n; i++) {
2216       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2217     }
2218   }
2219 
2220   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2221   if (idx) {
2222     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2223   }
2224   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2225   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2226 
2227   for (i=0; i<A->rmap->n; i++) {
2228     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2229       va[i] = vb[i];
2230       if (idx) idx[i] = a->garray[idxb[i]];
2231     }
2232   }
2233 
2234   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2235   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2236   ierr = PetscFree(idxb);CHKERRQ(ierr);
2237   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2242 {
2243   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2244   PetscErrorCode ierr;
2245   PetscInt       i,*idxb = 0;
2246   PetscScalar    *va,*vb;
2247   Vec            vtmp;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2251   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2252   if (idx) {
2253     for (i=0; i<A->cmap->n; i++) {
2254       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2255     }
2256   }
2257 
2258   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2259   if (idx) {
2260     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2261   }
2262   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2263   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2264 
2265   for (i=0; i<A->rmap->n; i++) {
2266     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2267       va[i] = vb[i];
2268       if (idx) idx[i] = a->garray[idxb[i]];
2269     }
2270   }
2271 
2272   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2273   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2274   ierr = PetscFree(idxb);CHKERRQ(ierr);
2275   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2280 {
2281   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2282   PetscInt       n      = A->rmap->n;
2283   PetscInt       cstart = A->cmap->rstart;
2284   PetscInt       *cmap  = mat->garray;
2285   PetscInt       *diagIdx, *offdiagIdx;
2286   Vec            diagV, offdiagV;
2287   PetscScalar    *a, *diagA, *offdiagA;
2288   PetscInt       r;
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2293   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2294   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2295   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2296   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2297   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2298   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2299   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2300   for (r = 0; r < n; ++r) {
2301     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2302       a[r]   = diagA[r];
2303       idx[r] = cstart + diagIdx[r];
2304     } else {
2305       a[r]   = offdiagA[r];
2306       idx[r] = cmap[offdiagIdx[r]];
2307     }
2308   }
2309   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2310   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2311   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2312   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2313   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2314   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2319 {
2320   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2321   PetscInt       n      = A->rmap->n;
2322   PetscInt       cstart = A->cmap->rstart;
2323   PetscInt       *cmap  = mat->garray;
2324   PetscInt       *diagIdx, *offdiagIdx;
2325   Vec            diagV, offdiagV;
2326   PetscScalar    *a, *diagA, *offdiagA;
2327   PetscInt       r;
2328   PetscErrorCode ierr;
2329 
2330   PetscFunctionBegin;
2331   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2332   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2333   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2334   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2335   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2336   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2337   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2338   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2339   for (r = 0; r < n; ++r) {
2340     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2341       a[r]   = diagA[r];
2342       idx[r] = cstart + diagIdx[r];
2343     } else {
2344       a[r]   = offdiagA[r];
2345       idx[r] = cmap[offdiagIdx[r]];
2346     }
2347   }
2348   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2349   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2350   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2351   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2352   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2353   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2358 {
2359   PetscErrorCode ierr;
2360   Mat            *dummy;
2361 
2362   PetscFunctionBegin;
2363   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2364   *newmat = *dummy;
2365   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2370 {
2371   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2376   A->factorerrortype = a->A->factorerrortype;
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2381 {
2382   PetscErrorCode ierr;
2383   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2387   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2388   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2394 {
2395   PetscFunctionBegin;
2396   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2397   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 /*@
2402    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2403 
2404    Collective on Mat
2405 
2406    Input Parameters:
2407 +    A - the matrix
2408 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2409 
2410  Level: advanced
2411 
2412 @*/
2413 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2414 {
2415   PetscErrorCode       ierr;
2416 
2417   PetscFunctionBegin;
2418   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2423 {
2424   PetscErrorCode       ierr;
2425   PetscBool            sc = PETSC_FALSE,flg;
2426 
2427   PetscFunctionBegin;
2428   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2429   ierr = PetscObjectOptionsBegin((PetscObject)A);
2430     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2431     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2432     if (flg) {
2433       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2434     }
2435   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2440 {
2441   PetscErrorCode ierr;
2442   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2443   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2444 
2445   PetscFunctionBegin;
2446   if (!Y->preallocated) {
2447     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2448   } else if (!aij->nz) {
2449     PetscInt nonew = aij->nonew;
2450     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2451     aij->nonew = nonew;
2452   }
2453   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2458 {
2459   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2460   PetscErrorCode ierr;
2461 
2462   PetscFunctionBegin;
2463   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2464   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2465   if (d) {
2466     PetscInt rstart;
2467     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2468     *d += rstart;
2469 
2470   }
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 
2475 /* -------------------------------------------------------------------*/
2476 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2477                                        MatGetRow_MPIAIJ,
2478                                        MatRestoreRow_MPIAIJ,
2479                                        MatMult_MPIAIJ,
2480                                 /* 4*/ MatMultAdd_MPIAIJ,
2481                                        MatMultTranspose_MPIAIJ,
2482                                        MatMultTransposeAdd_MPIAIJ,
2483                                        0,
2484                                        0,
2485                                        0,
2486                                 /*10*/ 0,
2487                                        0,
2488                                        0,
2489                                        MatSOR_MPIAIJ,
2490                                        MatTranspose_MPIAIJ,
2491                                 /*15*/ MatGetInfo_MPIAIJ,
2492                                        MatEqual_MPIAIJ,
2493                                        MatGetDiagonal_MPIAIJ,
2494                                        MatDiagonalScale_MPIAIJ,
2495                                        MatNorm_MPIAIJ,
2496                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2497                                        MatAssemblyEnd_MPIAIJ,
2498                                        MatSetOption_MPIAIJ,
2499                                        MatZeroEntries_MPIAIJ,
2500                                 /*24*/ MatZeroRows_MPIAIJ,
2501                                        0,
2502                                        0,
2503                                        0,
2504                                        0,
2505                                 /*29*/ MatSetUp_MPIAIJ,
2506                                        0,
2507                                        0,
2508                                        MatGetDiagonalBlock_MPIAIJ,
2509                                        0,
2510                                 /*34*/ MatDuplicate_MPIAIJ,
2511                                        0,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                 /*39*/ MatAXPY_MPIAIJ,
2516                                        MatCreateSubMatrices_MPIAIJ,
2517                                        MatIncreaseOverlap_MPIAIJ,
2518                                        MatGetValues_MPIAIJ,
2519                                        MatCopy_MPIAIJ,
2520                                 /*44*/ MatGetRowMax_MPIAIJ,
2521                                        MatScale_MPIAIJ,
2522                                        MatShift_MPIAIJ,
2523                                        MatDiagonalSet_MPIAIJ,
2524                                        MatZeroRowsColumns_MPIAIJ,
2525                                 /*49*/ MatSetRandom_MPIAIJ,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                        0,
2530                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2531                                        0,
2532                                        MatSetUnfactored_MPIAIJ,
2533                                        MatPermute_MPIAIJ,
2534                                        0,
2535                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2536                                        MatDestroy_MPIAIJ,
2537                                        MatView_MPIAIJ,
2538                                        0,
2539                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2540                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2541                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2546                                        MatGetRowMinAbs_MPIAIJ,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                 /*75*/ MatFDColoringApply_AIJ,
2552                                        MatSetFromOptions_MPIAIJ,
2553                                        0,
2554                                        0,
2555                                        MatFindZeroDiagonals_MPIAIJ,
2556                                 /*80*/ 0,
2557                                        0,
2558                                        0,
2559                                 /*83*/ MatLoad_MPIAIJ,
2560                                        MatIsSymmetric_MPIAIJ,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                        0,
2565                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2566                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2567                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2568                                        MatPtAP_MPIAIJ_MPIAIJ,
2569                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2570                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                 /*99*/ 0,
2576                                        0,
2577                                        0,
2578                                        MatConjugate_MPIAIJ,
2579                                        0,
2580                                 /*104*/MatSetValuesRow_MPIAIJ,
2581                                        MatRealPart_MPIAIJ,
2582                                        MatImaginaryPart_MPIAIJ,
2583                                        0,
2584                                        0,
2585                                 /*109*/0,
2586                                        0,
2587                                        MatGetRowMin_MPIAIJ,
2588                                        0,
2589                                        MatMissingDiagonal_MPIAIJ,
2590                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2591                                        0,
2592                                        MatGetGhosts_MPIAIJ,
2593                                        0,
2594                                        0,
2595                                 /*119*/0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                        MatGetMultiProcBlock_MPIAIJ,
2600                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2601                                        MatGetColumnNorms_MPIAIJ,
2602                                        MatInvertBlockDiagonal_MPIAIJ,
2603                                        0,
2604                                        MatCreateSubMatricesMPI_MPIAIJ,
2605                                 /*129*/0,
2606                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2607                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2608                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2609                                        0,
2610                                 /*134*/0,
2611                                        0,
2612                                        MatRARt_MPIAIJ_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                 /*139*/MatSetBlockSizes_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        MatFDColoringSetUp_MPIXAIJ,
2619                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2620                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2621 };
2622 
2623 /* ----------------------------------------------------------------------------------------*/
2624 
2625 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2626 {
2627   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2628   PetscErrorCode ierr;
2629 
2630   PetscFunctionBegin;
2631   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2632   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2637 {
2638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2639   PetscErrorCode ierr;
2640 
2641   PetscFunctionBegin;
2642   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2643   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2648 {
2649   Mat_MPIAIJ     *b;
2650   PetscErrorCode ierr;
2651 
2652   PetscFunctionBegin;
2653   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2654   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2655   b = (Mat_MPIAIJ*)B->data;
2656 
2657 #if defined(PETSC_USE_CTABLE)
2658   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2659 #else
2660   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2661 #endif
2662   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2663   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2664   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2665 
2666   /* Because the B will have been resized we simply destroy it and create a new one each time */
2667   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2668   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2669   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2670   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2671   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2672   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2673 
2674   if (!B->preallocated) {
2675     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2676     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2677     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2678     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2679     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2680   }
2681 
2682   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2683   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2684   B->preallocated  = PETSC_TRUE;
2685   B->was_assembled = PETSC_FALSE;
2686   B->assembled     = PETSC_FALSE;;
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2691 {
2692   Mat_MPIAIJ     *b;
2693   PetscErrorCode ierr;
2694 
2695   PetscFunctionBegin;
2696   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2697   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2698   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2699   b = (Mat_MPIAIJ*)B->data;
2700 
2701 #if defined(PETSC_USE_CTABLE)
2702   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2703 #else
2704   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2705 #endif
2706   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2707   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2708   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2709 
2710   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2711   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2712   B->preallocated  = PETSC_TRUE;
2713   B->was_assembled = PETSC_FALSE;
2714   B->assembled = PETSC_FALSE;
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2719 {
2720   Mat            mat;
2721   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2722   PetscErrorCode ierr;
2723 
2724   PetscFunctionBegin;
2725   *newmat = 0;
2726   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2727   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2728   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2729   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2730   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2731   a       = (Mat_MPIAIJ*)mat->data;
2732 
2733   mat->factortype   = matin->factortype;
2734   mat->assembled    = PETSC_TRUE;
2735   mat->insertmode   = NOT_SET_VALUES;
2736   mat->preallocated = PETSC_TRUE;
2737 
2738   a->size         = oldmat->size;
2739   a->rank         = oldmat->rank;
2740   a->donotstash   = oldmat->donotstash;
2741   a->roworiented  = oldmat->roworiented;
2742   a->rowindices   = 0;
2743   a->rowvalues    = 0;
2744   a->getrowactive = PETSC_FALSE;
2745 
2746   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2747   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2748 
2749   if (oldmat->colmap) {
2750 #if defined(PETSC_USE_CTABLE)
2751     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2752 #else
2753     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2754     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2755     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2756 #endif
2757   } else a->colmap = 0;
2758   if (oldmat->garray) {
2759     PetscInt len;
2760     len  = oldmat->B->cmap->n;
2761     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2762     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2763     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2764   } else a->garray = 0;
2765 
2766   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2767   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2768   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2769   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2770   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2771   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2772   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2773   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2774   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2775   *newmat = mat;
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2780 {
2781   PetscScalar    *vals,*svals;
2782   MPI_Comm       comm;
2783   PetscErrorCode ierr;
2784   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2785   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2786   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2787   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2788   PetscInt       cend,cstart,n,*rowners;
2789   int            fd;
2790   PetscInt       bs = newMat->rmap->bs;
2791 
2792   PetscFunctionBegin;
2793   /* force binary viewer to load .info file if it has not yet done so */
2794   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2795   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2796   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2797   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2798   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2799   if (!rank) {
2800     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2801     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2802     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2803   }
2804 
2805   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2806   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2807   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2808   if (bs < 0) bs = 1;
2809 
2810   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2811   M    = header[1]; N = header[2];
2812 
2813   /* If global sizes are set, check if they are consistent with that given in the file */
2814   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2815   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2816 
2817   /* determine ownership of all (block) rows */
2818   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2819   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2820   else m = newMat->rmap->n; /* Set by user */
2821 
2822   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2823   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2824 
2825   /* First process needs enough room for process with most rows */
2826   if (!rank) {
2827     mmax = rowners[1];
2828     for (i=2; i<=size; i++) {
2829       mmax = PetscMax(mmax, rowners[i]);
2830     }
2831   } else mmax = -1;             /* unused, but compilers complain */
2832 
2833   rowners[0] = 0;
2834   for (i=2; i<=size; i++) {
2835     rowners[i] += rowners[i-1];
2836   }
2837   rstart = rowners[rank];
2838   rend   = rowners[rank+1];
2839 
2840   /* distribute row lengths to all processors */
2841   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2842   if (!rank) {
2843     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2844     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2845     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2846     for (j=0; j<m; j++) {
2847       procsnz[0] += ourlens[j];
2848     }
2849     for (i=1; i<size; i++) {
2850       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2851       /* calculate the number of nonzeros on each processor */
2852       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2853         procsnz[i] += rowlengths[j];
2854       }
2855       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2856     }
2857     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2858   } else {
2859     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2860   }
2861 
2862   if (!rank) {
2863     /* determine max buffer needed and allocate it */
2864     maxnz = 0;
2865     for (i=0; i<size; i++) {
2866       maxnz = PetscMax(maxnz,procsnz[i]);
2867     }
2868     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2869 
2870     /* read in my part of the matrix column indices  */
2871     nz   = procsnz[0];
2872     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2873     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2874 
2875     /* read in every one elses and ship off */
2876     for (i=1; i<size; i++) {
2877       nz   = procsnz[i];
2878       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2879       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2880     }
2881     ierr = PetscFree(cols);CHKERRQ(ierr);
2882   } else {
2883     /* determine buffer space needed for message */
2884     nz = 0;
2885     for (i=0; i<m; i++) {
2886       nz += ourlens[i];
2887     }
2888     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2889 
2890     /* receive message of column indices*/
2891     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2892   }
2893 
2894   /* determine column ownership if matrix is not square */
2895   if (N != M) {
2896     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2897     else n = newMat->cmap->n;
2898     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2899     cstart = cend - n;
2900   } else {
2901     cstart = rstart;
2902     cend   = rend;
2903     n      = cend - cstart;
2904   }
2905 
2906   /* loop over local rows, determining number of off diagonal entries */
2907   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2908   jj   = 0;
2909   for (i=0; i<m; i++) {
2910     for (j=0; j<ourlens[i]; j++) {
2911       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2912       jj++;
2913     }
2914   }
2915 
2916   for (i=0; i<m; i++) {
2917     ourlens[i] -= offlens[i];
2918   }
2919   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2920 
2921   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2922 
2923   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2924 
2925   for (i=0; i<m; i++) {
2926     ourlens[i] += offlens[i];
2927   }
2928 
2929   if (!rank) {
2930     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2931 
2932     /* read in my part of the matrix numerical values  */
2933     nz   = procsnz[0];
2934     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2935 
2936     /* insert into matrix */
2937     jj      = rstart;
2938     smycols = mycols;
2939     svals   = vals;
2940     for (i=0; i<m; i++) {
2941       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2942       smycols += ourlens[i];
2943       svals   += ourlens[i];
2944       jj++;
2945     }
2946 
2947     /* read in other processors and ship out */
2948     for (i=1; i<size; i++) {
2949       nz   = procsnz[i];
2950       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2951       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2954   } else {
2955     /* receive numeric values */
2956     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2957 
2958     /* receive message of values*/
2959     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2960 
2961     /* insert into matrix */
2962     jj      = rstart;
2963     smycols = mycols;
2964     svals   = vals;
2965     for (i=0; i<m; i++) {
2966       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967       smycols += ourlens[i];
2968       svals   += ourlens[i];
2969       jj++;
2970     }
2971   }
2972   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2973   ierr = PetscFree(vals);CHKERRQ(ierr);
2974   ierr = PetscFree(mycols);CHKERRQ(ierr);
2975   ierr = PetscFree(rowners);CHKERRQ(ierr);
2976   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 /* Not scalable because of ISAllGather() unless getting all columns. */
2982 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2983 {
2984   PetscErrorCode ierr;
2985   IS             iscol_local;
2986   PetscBool      isstride;
2987   PetscMPIInt    lisstride=0,gisstride;
2988 
2989   PetscFunctionBegin;
2990   /* check if we are grabbing all columns*/
2991   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2992 
2993   if (isstride) {
2994     PetscInt  start,len,mstart,mlen;
2995     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2996     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2997     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2998     if (mstart == start && mlen-mstart == len) lisstride = 1;
2999   }
3000 
3001   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3002   if (gisstride) {
3003     PetscInt N;
3004     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3005     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3006     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3007     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3008   } else {
3009     PetscInt cbs;
3010     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3011     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3012     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3013   }
3014 
3015   *isseq = iscol_local;
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 /*
3020  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3021  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3022 
3023  Input Parameters:
3024    mat - matrix
3025    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3026            i.e., mat->rstart <= isrow[i] < mat->rend
3027    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3028            i.e., mat->cstart <= iscol[i] < mat->cend
3029  Output Parameter:
3030    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3031    iscol_o - sequential column index set for retrieving mat->B
3032    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3033  */
3034 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3035 {
3036   PetscErrorCode ierr;
3037   Vec            x,cmap;
3038   const PetscInt *is_idx;
3039   PetscScalar    *xarray,*cmaparray;
3040   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3041   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3042   Mat            B=a->B;
3043   Vec            lvec=a->lvec,lcmap;
3044   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3045   MPI_Comm       comm;
3046   PetscMPIInt    rank;
3047   VecScatter     Mvctx;
3048 
3049   PetscFunctionBegin;
3050   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3052   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3053 
3054   //ierr = MatView(mat,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3055   //ierr = ISView(iscol,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3056 
3057   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3058   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3059   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3060   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3061   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3062 
3063   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3064 
3065   /* Get start indices */
3066   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3067   isstart -= ncols;
3068   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3069 
3070   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3071   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3072   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3073   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3074   for (i=0; i<ncols; i++) {
3075     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3076     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3077     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3078   }
3079   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3080   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3081   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3082   //ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3083 
3084   /* Get iscol_d */
3085   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3086   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3087   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3088 
3089   /* Get isrow_d */
3090   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3091   rstart = mat->rmap->rstart;
3092   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3093   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3094   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3095   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3096 
3097   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3098   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3099   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3100 
3101   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3102 #if 0
3103   if (!a->Mvctx_mpi1) {
3104     /* a->Mvctx causes random 'count' in o-build? See src/mat/examples/tests/runex59_2 */
3105     a->Mvctx_mpi1_flg = PETSC_TRUE;
3106     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
3107   }
3108   Mvctx = a->Mvctx_mpi1;
3109 #endif
3110   Mvctx = a->Mvctx;
3111   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3112   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3113 
3114   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3115   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3116 
3117   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3118   /* off-process column indices */
3119   count = 0;
3120   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3121   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3122 
3123   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3124   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3125   for (i=0; i<Bn; i++) {
3126     if (PetscRealPart(xarray[i]) > -1.0) {
3127       idx[count]     = i;                   /* local column index in off-diagonal part B */
3128       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3129       count++;
3130     }
3131   }
3132   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3133   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3134   //printf("[%d] count %d, nlvec %d\n",rank,count,lvec->map->N);
3135   if (count != 6) {
3136     printf("[%d] lvec:\n",rank);
3137     ierr = VecView(lvec,0);CHKERRQ(ierr);
3138     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"count %d != 6",count);
3139   }
3140   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3141   /* cannot ensure iscol_o has same blocksize as iscol! */
3142 
3143   ierr = PetscFree(idx);CHKERRQ(ierr);
3144 
3145   *garray = cmap1;
3146 
3147   ierr = VecDestroy(&x);CHKERRQ(ierr);
3148   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3149   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3150   PetscFunctionReturn(0);
3151 }
3152 
3153 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3154 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3155 {
3156   PetscErrorCode ierr;
3157   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3158   Mat            M = NULL;
3159   MPI_Comm       comm;
3160   IS             iscol_d,isrow_d,iscol_o;
3161   Mat            Asub = NULL,Bsub = NULL;
3162   PetscInt       n;
3163 
3164   PetscFunctionBegin;
3165   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3166 
3167   if (call == MAT_REUSE_MATRIX) {
3168     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3169     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3170     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3171 
3172     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3173     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3174 
3175     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3176     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3177 
3178     /* Update diagonal and off-diagonal portions of submat */
3179     asub = (Mat_MPIAIJ*)(*submat)->data;
3180     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3181     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3182     if (n) {
3183       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3184     }
3185     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3186     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3187 
3188   } else { /* call == MAT_INITIAL_MATRIX) */
3189     const PetscInt *garray;
3190     PetscInt        BsubN;
3191 
3192     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3193     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3194 
3195     /* Create local submatrices Asub and Bsub */
3196     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3197     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3198 
3199     /* Create submatrix M */
3200     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3201 
3202     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3203     asub = (Mat_MPIAIJ*)M->data;
3204 
3205     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3206     n = asub->B->cmap->N;
3207     if (BsubN > n) {
3208       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3209       const PetscInt *idx;
3210       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3211       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3212 
3213       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3214       j = 0;
3215       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3216       for (i=0; i<n; i++) {
3217         if (j >= BsubN) break;
3218         while (subgarray[i] > garray[j]) j++;
3219 
3220         if (subgarray[i] == garray[j]) {
3221           idx_new[i] = idx[j++];
3222         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3223       }
3224       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3225 
3226       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3227       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3228 
3229     } else if (BsubN < n) {
3230       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3231     }
3232 
3233     ierr = PetscFree(garray);CHKERRQ(ierr);
3234     *submat = M;
3235 
3236     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3237     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3238     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3239 
3240     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3241     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3242 
3243     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3244     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3245   }
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3250 {
3251   PetscErrorCode ierr;
3252   IS             iscol_local=NULL,isrow_d;
3253   PetscInt       csize;
3254   PetscInt       n,i,j,start,end;
3255   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3256   MPI_Comm       comm;
3257 
3258   PetscFunctionBegin;
3259   /* If isrow has same processor distribution as mat,
3260      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3261   if (call == MAT_REUSE_MATRIX) {
3262     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3263     if (isrow_d) {
3264       sameRowDist  = PETSC_TRUE;
3265       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3266     } else {
3267       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3268       if (iscol_local) {
3269         sameRowDist  = PETSC_TRUE;
3270         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3271       }
3272     }
3273   } else {
3274     /* Check if isrow has same processor distribution as mat */
3275     sameDist[0] = PETSC_FALSE;
3276     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3277     if (!n) {
3278       sameDist[0] = PETSC_TRUE;
3279     } else {
3280       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3281       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3282       if (i >= start && j < end) {
3283         sameDist[0] = PETSC_TRUE;
3284       }
3285     }
3286 
3287     /* Check if iscol has same processor distribution as mat */
3288     sameDist[1] = PETSC_FALSE;
3289     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3290     if (!n) {
3291       sameDist[1] = PETSC_TRUE;
3292     } else {
3293       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3294       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3295       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3296     }
3297 
3298     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3299     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3300     sameRowDist = tsameDist[0];
3301   }
3302 
3303   if (sameRowDist) {
3304     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3305       /* isrow and iscol have same processor distribution as mat */
3306       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3307       PetscFunctionReturn(0);
3308     } else { /* sameRowDist */
3309       /* isrow has same processor distribution as mat */
3310       if (call == MAT_INITIAL_MATRIX) {
3311         PetscBool sorted;
3312         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3313         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3314         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3315         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3316 
3317         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3318         if (sorted) {
3319           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3320           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3321           PetscFunctionReturn(0);
3322         }
3323       } else { /* call == MAT_REUSE_MATRIX */
3324         IS    iscol_sub;
3325         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3326         if (iscol_sub) {
3327           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3328           PetscFunctionReturn(0);
3329         }
3330       }
3331     }
3332   }
3333 
3334   /* General case: iscol -> iscol_local which has global size of iscol */
3335   if (call == MAT_REUSE_MATRIX) {
3336     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3337     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3338   } else {
3339     if (!iscol_local) {
3340       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3341     }
3342   }
3343 
3344   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3345   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3346 
3347   if (call == MAT_INITIAL_MATRIX) {
3348     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3349     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3350   }
3351   PetscFunctionReturn(0);
3352 }
3353 
3354 /*@C
3355      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3356          and "off-diagonal" part of the matrix in CSR format.
3357 
3358    Collective on MPI_Comm
3359 
3360    Input Parameters:
3361 +  comm - MPI communicator
3362 .  A - "diagonal" portion of matrix
3363 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3364 -  garray - global index of B columns
3365 
3366    Output Parameter:
3367 .   mat - the matrix, with input A as its local diagonal matrix
3368    Level: advanced
3369 
3370    Notes:
3371        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3372        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3373 
3374 .seealso: MatCreateMPIAIJWithSplitArrays()
3375 @*/
3376 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3377 {
3378   PetscErrorCode ierr;
3379   Mat_MPIAIJ     *maij;
3380   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3381   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3382   PetscScalar    *oa=b->a;
3383   Mat            Bnew;
3384   PetscInt       m,n,N;
3385 
3386   PetscFunctionBegin;
3387   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3388   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3389   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3390   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3391   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3392   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3393 
3394   /* Get global columns of mat */
3395   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3396 
3397   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3398   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3399   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3400   maij = (Mat_MPIAIJ*)(*mat)->data;
3401 
3402   (*mat)->preallocated = PETSC_TRUE;
3403 
3404   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3405   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3406 
3407   /* Set A as diagonal portion of *mat */
3408   maij->A = A;
3409 
3410   nz = oi[m];
3411   for (i=0; i<nz; i++) {
3412     col   = oj[i];
3413     oj[i] = garray[col];
3414   }
3415 
3416    /* Set Bnew as off-diagonal portion of *mat */
3417   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3418   bnew        = (Mat_SeqAIJ*)Bnew->data;
3419   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3420   maij->B     = Bnew;
3421 
3422   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3423 
3424   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3425   b->free_a       = PETSC_FALSE;
3426   b->free_ij      = PETSC_FALSE;
3427   ierr = MatDestroy(&B);CHKERRQ(ierr);
3428 
3429   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3430   bnew->free_a       = PETSC_TRUE;
3431   bnew->free_ij      = PETSC_TRUE;
3432 
3433   /* condense columns of maij->B */
3434   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3435   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3436   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3437   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3438   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3439   PetscFunctionReturn(0);
3440 }
3441 
3442 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3443 
3444 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3445 {
3446   PetscErrorCode ierr;
3447   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3448   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3449   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3450   Mat            M,Msub,B=a->B;
3451   MatScalar      *aa;
3452   Mat_SeqAIJ     *aij;
3453   PetscInt       *garray = a->garray,*colsub,Ncols;
3454   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3455   IS             iscol_sub,iscmap;
3456   const PetscInt *is_idx,*cmap;
3457   PetscBool      allcolumns=PETSC_FALSE;
3458   MPI_Comm       comm;
3459 
3460   PetscFunctionBegin;
3461   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3462 
3463   if (call == MAT_REUSE_MATRIX) {
3464     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3465     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3466     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3467 
3468     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3469     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3470 
3471     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3472     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3473 
3474     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3475 
3476   } else { /* call == MAT_INITIAL_MATRIX) */
3477     PetscBool flg;
3478 
3479     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3480     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3481 
3482     /* (1) iscol -> nonscalable iscol_local */
3483     /* Check for special case: each processor gets entire matrix columns */
3484     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3485     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3486     if (allcolumns) {
3487       iscol_sub = iscol_local;
3488       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3489       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3490 
3491     } else {
3492       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3493       PetscInt *idx,*cmap1,k;
3494       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3495       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3496       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3497       count = 0;
3498       k     = 0;
3499       for (i=0; i<Ncols; i++) {
3500         j = is_idx[i];
3501         if (j >= cstart && j < cend) {
3502           /* diagonal part of mat */
3503           idx[count]     = j;
3504           cmap1[count++] = i; /* column index in submat */
3505         } else if (Bn) {
3506           /* off-diagonal part of mat */
3507           if (j == garray[k]) {
3508             idx[count]     = j;
3509             cmap1[count++] = i;  /* column index in submat */
3510           } else if (j > garray[k]) {
3511             while (j > garray[k] && k < Bn-1) k++;
3512             if (j == garray[k]) {
3513               idx[count]     = j;
3514               cmap1[count++] = i; /* column index in submat */
3515             }
3516           }
3517         }
3518       }
3519       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3520 
3521       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3522       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3523       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3524 
3525       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3526     }
3527 
3528     /* (3) Create sequential Msub */
3529     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3530   }
3531 
3532   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3533   aij  = (Mat_SeqAIJ*)(Msub)->data;
3534   ii   = aij->i;
3535   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3536 
3537   /*
3538       m - number of local rows
3539       Ncols - number of columns (same on all processors)
3540       rstart - first row in new global matrix generated
3541   */
3542   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3543 
3544   if (call == MAT_INITIAL_MATRIX) {
3545     /* (4) Create parallel newmat */
3546     PetscMPIInt    rank,size;
3547     PetscInt       csize;
3548 
3549     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3550     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3551 
3552     /*
3553         Determine the number of non-zeros in the diagonal and off-diagonal
3554         portions of the matrix in order to do correct preallocation
3555     */
3556 
3557     /* first get start and end of "diagonal" columns */
3558     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3559     if (csize == PETSC_DECIDE) {
3560       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3561       if (mglobal == Ncols) { /* square matrix */
3562         nlocal = m;
3563       } else {
3564         nlocal = Ncols/size + ((Ncols % size) > rank);
3565       }
3566     } else {
3567       nlocal = csize;
3568     }
3569     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3570     rstart = rend - nlocal;
3571     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3572 
3573     /* next, compute all the lengths */
3574     jj    = aij->j;
3575     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3576     olens = dlens + m;
3577     for (i=0; i<m; i++) {
3578       jend = ii[i+1] - ii[i];
3579       olen = 0;
3580       dlen = 0;
3581       for (j=0; j<jend; j++) {
3582         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3583         else dlen++;
3584         jj++;
3585       }
3586       olens[i] = olen;
3587       dlens[i] = dlen;
3588     }
3589 
3590     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3591     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3592 
3593     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3594     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3595     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3596     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3597     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3598     ierr = PetscFree(dlens);CHKERRQ(ierr);
3599 
3600   } else { /* call == MAT_REUSE_MATRIX */
3601     M    = *newmat;
3602     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3603     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3604     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3605     /*
3606          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3607        rather than the slower MatSetValues().
3608     */
3609     M->was_assembled = PETSC_TRUE;
3610     M->assembled     = PETSC_FALSE;
3611   }
3612 
3613   /* (5) Set values of Msub to *newmat */
3614   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3615   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3616 
3617   jj   = aij->j;
3618   aa   = aij->a;
3619   for (i=0; i<m; i++) {
3620     row = rstart + i;
3621     nz  = ii[i+1] - ii[i];
3622     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3623     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3624     jj += nz; aa += nz;
3625   }
3626   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3627 
3628   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3629   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3630 
3631   ierr = PetscFree(colsub);CHKERRQ(ierr);
3632 
3633   /* save Msub, iscol_sub and iscmap used in processor for next request */
3634   if (call ==  MAT_INITIAL_MATRIX) {
3635     *newmat = M;
3636     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3637     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3638 
3639     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3640     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3641 
3642     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3643     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3644 
3645     if (iscol_local) {
3646       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3647       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3648     }
3649   }
3650   PetscFunctionReturn(0);
3651 }
3652 
3653 /*
3654     Not great since it makes two copies of the submatrix, first an SeqAIJ
3655   in local and then by concatenating the local matrices the end result.
3656   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3657 
3658   Note: This requires a sequential iscol with all indices.
3659 */
3660 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3661 {
3662   PetscErrorCode ierr;
3663   PetscMPIInt    rank,size;
3664   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3665   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3666   Mat            M,Mreuse;
3667   MatScalar      *aa,*vwork;
3668   MPI_Comm       comm;
3669   Mat_SeqAIJ     *aij;
3670   PetscBool      colflag,allcolumns=PETSC_FALSE;
3671 
3672   PetscFunctionBegin;
3673   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3674   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3676 
3677   /* Check for special case: each processor gets entire matrix columns */
3678   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3679   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3680   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3681 
3682   if (call ==  MAT_REUSE_MATRIX) {
3683     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3684     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3685     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3686   } else {
3687     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3688   }
3689 
3690   /*
3691       m - number of local rows
3692       n - number of columns (same on all processors)
3693       rstart - first row in new global matrix generated
3694   */
3695   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3696   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3697   if (call == MAT_INITIAL_MATRIX) {
3698     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3699     ii  = aij->i;
3700     jj  = aij->j;
3701 
3702     /*
3703         Determine the number of non-zeros in the diagonal and off-diagonal
3704         portions of the matrix in order to do correct preallocation
3705     */
3706 
3707     /* first get start and end of "diagonal" columns */
3708     if (csize == PETSC_DECIDE) {
3709       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3710       if (mglobal == n) { /* square matrix */
3711         nlocal = m;
3712       } else {
3713         nlocal = n/size + ((n % size) > rank);
3714       }
3715     } else {
3716       nlocal = csize;
3717     }
3718     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3719     rstart = rend - nlocal;
3720     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3721 
3722     /* next, compute all the lengths */
3723     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3724     olens = dlens + m;
3725     for (i=0; i<m; i++) {
3726       jend = ii[i+1] - ii[i];
3727       olen = 0;
3728       dlen = 0;
3729       for (j=0; j<jend; j++) {
3730         if (*jj < rstart || *jj >= rend) olen++;
3731         else dlen++;
3732         jj++;
3733       }
3734       olens[i] = olen;
3735       dlens[i] = dlen;
3736     }
3737     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3738     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3739     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3740     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3741     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3742     ierr = PetscFree(dlens);CHKERRQ(ierr);
3743   } else {
3744     PetscInt ml,nl;
3745 
3746     M    = *newmat;
3747     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3748     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3749     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3750     /*
3751          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3752        rather than the slower MatSetValues().
3753     */
3754     M->was_assembled = PETSC_TRUE;
3755     M->assembled     = PETSC_FALSE;
3756   }
3757   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3758   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3759   ii   = aij->i;
3760   jj   = aij->j;
3761   aa   = aij->a;
3762   for (i=0; i<m; i++) {
3763     row   = rstart + i;
3764     nz    = ii[i+1] - ii[i];
3765     cwork = jj;     jj += nz;
3766     vwork = aa;     aa += nz;
3767     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3768   }
3769 
3770   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3771   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3772   *newmat = M;
3773 
3774   /* save submatrix used in processor for next request */
3775   if (call ==  MAT_INITIAL_MATRIX) {
3776     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3777     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3778   }
3779   PetscFunctionReturn(0);
3780 }
3781 
3782 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3783 {
3784   PetscInt       m,cstart, cend,j,nnz,i,d;
3785   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3786   const PetscInt *JJ;
3787   PetscScalar    *values;
3788   PetscErrorCode ierr;
3789   PetscBool      nooffprocentries;
3790 
3791   PetscFunctionBegin;
3792   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3793 
3794   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3795   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3796   m      = B->rmap->n;
3797   cstart = B->cmap->rstart;
3798   cend   = B->cmap->rend;
3799   rstart = B->rmap->rstart;
3800 
3801   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3802 
3803 #if defined(PETSC_USE_DEBUGGING)
3804   for (i=0; i<m; i++) {
3805     nnz = Ii[i+1]- Ii[i];
3806     JJ  = J + Ii[i];
3807     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3808     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3809     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3810   }
3811 #endif
3812 
3813   for (i=0; i<m; i++) {
3814     nnz     = Ii[i+1]- Ii[i];
3815     JJ      = J + Ii[i];
3816     nnz_max = PetscMax(nnz_max,nnz);
3817     d       = 0;
3818     for (j=0; j<nnz; j++) {
3819       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3820     }
3821     d_nnz[i] = d;
3822     o_nnz[i] = nnz - d;
3823   }
3824   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3825   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3826 
3827   if (v) values = (PetscScalar*)v;
3828   else {
3829     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3830   }
3831 
3832   for (i=0; i<m; i++) {
3833     ii   = i + rstart;
3834     nnz  = Ii[i+1]- Ii[i];
3835     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3836   }
3837   nooffprocentries    = B->nooffprocentries;
3838   B->nooffprocentries = PETSC_TRUE;
3839   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3840   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3841   B->nooffprocentries = nooffprocentries;
3842 
3843   if (!v) {
3844     ierr = PetscFree(values);CHKERRQ(ierr);
3845   }
3846   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3847   PetscFunctionReturn(0);
3848 }
3849 
3850 /*@
3851    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3852    (the default parallel PETSc format).
3853 
3854    Collective on MPI_Comm
3855 
3856    Input Parameters:
3857 +  B - the matrix
3858 .  i - the indices into j for the start of each local row (starts with zero)
3859 .  j - the column indices for each local row (starts with zero)
3860 -  v - optional values in the matrix
3861 
3862    Level: developer
3863 
3864    Notes:
3865        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3866      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3867      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3868 
3869        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3870 
3871        The format which is used for the sparse matrix input, is equivalent to a
3872     row-major ordering.. i.e for the following matrix, the input data expected is
3873     as shown
3874 
3875 $        1 0 0
3876 $        2 0 3     P0
3877 $       -------
3878 $        4 5 6     P1
3879 $
3880 $     Process0 [P0]: rows_owned=[0,1]
3881 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3882 $        j =  {0,0,2}  [size = 3]
3883 $        v =  {1,2,3}  [size = 3]
3884 $
3885 $     Process1 [P1]: rows_owned=[2]
3886 $        i =  {0,3}    [size = nrow+1  = 1+1]
3887 $        j =  {0,1,2}  [size = 3]
3888 $        v =  {4,5,6}  [size = 3]
3889 
3890 .keywords: matrix, aij, compressed row, sparse, parallel
3891 
3892 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3893           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3894 @*/
3895 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3896 {
3897   PetscErrorCode ierr;
3898 
3899   PetscFunctionBegin;
3900   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3901   PetscFunctionReturn(0);
3902 }
3903 
3904 /*@C
3905    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3906    (the default parallel PETSc format).  For good matrix assembly performance
3907    the user should preallocate the matrix storage by setting the parameters
3908    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3909    performance can be increased by more than a factor of 50.
3910 
3911    Collective on MPI_Comm
3912 
3913    Input Parameters:
3914 +  B - the matrix
3915 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3916            (same value is used for all local rows)
3917 .  d_nnz - array containing the number of nonzeros in the various rows of the
3918            DIAGONAL portion of the local submatrix (possibly different for each row)
3919            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3920            The size of this array is equal to the number of local rows, i.e 'm'.
3921            For matrices that will be factored, you must leave room for (and set)
3922            the diagonal entry even if it is zero.
3923 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3924            submatrix (same value is used for all local rows).
3925 -  o_nnz - array containing the number of nonzeros in the various rows of the
3926            OFF-DIAGONAL portion of the local submatrix (possibly different for
3927            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3928            structure. The size of this array is equal to the number
3929            of local rows, i.e 'm'.
3930 
3931    If the *_nnz parameter is given then the *_nz parameter is ignored
3932 
3933    The AIJ format (also called the Yale sparse matrix format or
3934    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3935    storage.  The stored row and column indices begin with zero.
3936    See Users-Manual: ch_mat for details.
3937 
3938    The parallel matrix is partitioned such that the first m0 rows belong to
3939    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3940    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3941 
3942    The DIAGONAL portion of the local submatrix of a processor can be defined
3943    as the submatrix which is obtained by extraction the part corresponding to
3944    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3945    first row that belongs to the processor, r2 is the last row belonging to
3946    the this processor, and c1-c2 is range of indices of the local part of a
3947    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3948    common case of a square matrix, the row and column ranges are the same and
3949    the DIAGONAL part is also square. The remaining portion of the local
3950    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3951 
3952    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3953 
3954    You can call MatGetInfo() to get information on how effective the preallocation was;
3955    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3956    You can also run with the option -info and look for messages with the string
3957    malloc in them to see if additional memory allocation was needed.
3958 
3959    Example usage:
3960 
3961    Consider the following 8x8 matrix with 34 non-zero values, that is
3962    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3963    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3964    as follows:
3965 
3966 .vb
3967             1  2  0  |  0  3  0  |  0  4
3968     Proc0   0  5  6  |  7  0  0  |  8  0
3969             9  0 10  | 11  0  0  | 12  0
3970     -------------------------------------
3971            13  0 14  | 15 16 17  |  0  0
3972     Proc1   0 18  0  | 19 20 21  |  0  0
3973             0  0  0  | 22 23  0  | 24  0
3974     -------------------------------------
3975     Proc2  25 26 27  |  0  0 28  | 29  0
3976            30  0  0  | 31 32 33  |  0 34
3977 .ve
3978 
3979    This can be represented as a collection of submatrices as:
3980 
3981 .vb
3982       A B C
3983       D E F
3984       G H I
3985 .ve
3986 
3987    Where the submatrices A,B,C are owned by proc0, D,E,F are
3988    owned by proc1, G,H,I are owned by proc2.
3989 
3990    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3991    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3992    The 'M','N' parameters are 8,8, and have the same values on all procs.
3993 
3994    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3995    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3996    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3997    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3998    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3999    matrix, ans [DF] as another SeqAIJ matrix.
4000 
4001    When d_nz, o_nz parameters are specified, d_nz storage elements are
4002    allocated for every row of the local diagonal submatrix, and o_nz
4003    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4004    One way to choose d_nz and o_nz is to use the max nonzerors per local
4005    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4006    In this case, the values of d_nz,o_nz are:
4007 .vb
4008      proc0 : dnz = 2, o_nz = 2
4009      proc1 : dnz = 3, o_nz = 2
4010      proc2 : dnz = 1, o_nz = 4
4011 .ve
4012    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4013    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4014    for proc3. i.e we are using 12+15+10=37 storage locations to store
4015    34 values.
4016 
4017    When d_nnz, o_nnz parameters are specified, the storage is specified
4018    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4019    In the above case the values for d_nnz,o_nnz are:
4020 .vb
4021      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4022      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4023      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4024 .ve
4025    Here the space allocated is sum of all the above values i.e 34, and
4026    hence pre-allocation is perfect.
4027 
4028    Level: intermediate
4029 
4030 .keywords: matrix, aij, compressed row, sparse, parallel
4031 
4032 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4033           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4034 @*/
4035 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4036 {
4037   PetscErrorCode ierr;
4038 
4039   PetscFunctionBegin;
4040   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4041   PetscValidType(B,1);
4042   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4043   PetscFunctionReturn(0);
4044 }
4045 
4046 /*@
4047      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4048          CSR format the local rows.
4049 
4050    Collective on MPI_Comm
4051 
4052    Input Parameters:
4053 +  comm - MPI communicator
4054 .  m - number of local rows (Cannot be PETSC_DECIDE)
4055 .  n - This value should be the same as the local size used in creating the
4056        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4057        calculated if N is given) For square matrices n is almost always m.
4058 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4059 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4060 .   i - row indices
4061 .   j - column indices
4062 -   a - matrix values
4063 
4064    Output Parameter:
4065 .   mat - the matrix
4066 
4067    Level: intermediate
4068 
4069    Notes:
4070        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4071      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4072      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4073 
4074        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4075 
4076        The format which is used for the sparse matrix input, is equivalent to a
4077     row-major ordering.. i.e for the following matrix, the input data expected is
4078     as shown
4079 
4080 $        1 0 0
4081 $        2 0 3     P0
4082 $       -------
4083 $        4 5 6     P1
4084 $
4085 $     Process0 [P0]: rows_owned=[0,1]
4086 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4087 $        j =  {0,0,2}  [size = 3]
4088 $        v =  {1,2,3}  [size = 3]
4089 $
4090 $     Process1 [P1]: rows_owned=[2]
4091 $        i =  {0,3}    [size = nrow+1  = 1+1]
4092 $        j =  {0,1,2}  [size = 3]
4093 $        v =  {4,5,6}  [size = 3]
4094 
4095 .keywords: matrix, aij, compressed row, sparse, parallel
4096 
4097 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4098           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4099 @*/
4100 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4101 {
4102   PetscErrorCode ierr;
4103 
4104   PetscFunctionBegin;
4105   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4106   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4107   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4108   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4109   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4110   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4111   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4112   PetscFunctionReturn(0);
4113 }
4114 
4115 /*@C
4116    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4117    (the default parallel PETSc format).  For good matrix assembly performance
4118    the user should preallocate the matrix storage by setting the parameters
4119    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4120    performance can be increased by more than a factor of 50.
4121 
4122    Collective on MPI_Comm
4123 
4124    Input Parameters:
4125 +  comm - MPI communicator
4126 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4127            This value should be the same as the local size used in creating the
4128            y vector for the matrix-vector product y = Ax.
4129 .  n - This value should be the same as the local size used in creating the
4130        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4131        calculated if N is given) For square matrices n is almost always m.
4132 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4133 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4134 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4135            (same value is used for all local rows)
4136 .  d_nnz - array containing the number of nonzeros in the various rows of the
4137            DIAGONAL portion of the local submatrix (possibly different for each row)
4138            or NULL, if d_nz is used to specify the nonzero structure.
4139            The size of this array is equal to the number of local rows, i.e 'm'.
4140 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4141            submatrix (same value is used for all local rows).
4142 -  o_nnz - array containing the number of nonzeros in the various rows of the
4143            OFF-DIAGONAL portion of the local submatrix (possibly different for
4144            each row) or NULL, if o_nz is used to specify the nonzero
4145            structure. The size of this array is equal to the number
4146            of local rows, i.e 'm'.
4147 
4148    Output Parameter:
4149 .  A - the matrix
4150 
4151    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4152    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4153    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4154 
4155    Notes:
4156    If the *_nnz parameter is given then the *_nz parameter is ignored
4157 
4158    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4159    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4160    storage requirements for this matrix.
4161 
4162    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4163    processor than it must be used on all processors that share the object for
4164    that argument.
4165 
4166    The user MUST specify either the local or global matrix dimensions
4167    (possibly both).
4168 
4169    The parallel matrix is partitioned across processors such that the
4170    first m0 rows belong to process 0, the next m1 rows belong to
4171    process 1, the next m2 rows belong to process 2 etc.. where
4172    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4173    values corresponding to [m x N] submatrix.
4174 
4175    The columns are logically partitioned with the n0 columns belonging
4176    to 0th partition, the next n1 columns belonging to the next
4177    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4178 
4179    The DIAGONAL portion of the local submatrix on any given processor
4180    is the submatrix corresponding to the rows and columns m,n
4181    corresponding to the given processor. i.e diagonal matrix on
4182    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4183    etc. The remaining portion of the local submatrix [m x (N-n)]
4184    constitute the OFF-DIAGONAL portion. The example below better
4185    illustrates this concept.
4186 
4187    For a square global matrix we define each processor's diagonal portion
4188    to be its local rows and the corresponding columns (a square submatrix);
4189    each processor's off-diagonal portion encompasses the remainder of the
4190    local matrix (a rectangular submatrix).
4191 
4192    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4193 
4194    When calling this routine with a single process communicator, a matrix of
4195    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4196    type of communicator, use the construction mechanism
4197 .vb
4198      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4199 .ve
4200 
4201 $     MatCreate(...,&A);
4202 $     MatSetType(A,MATMPIAIJ);
4203 $     MatSetSizes(A, m,n,M,N);
4204 $     MatMPIAIJSetPreallocation(A,...);
4205 
4206    By default, this format uses inodes (identical nodes) when possible.
4207    We search for consecutive rows with the same nonzero structure, thereby
4208    reusing matrix information to achieve increased efficiency.
4209 
4210    Options Database Keys:
4211 +  -mat_no_inode  - Do not use inodes
4212 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4213 -  -mat_aij_oneindex - Internally use indexing starting at 1
4214         rather than 0.  Note that when calling MatSetValues(),
4215         the user still MUST index entries starting at 0!
4216 
4217 
4218    Example usage:
4219 
4220    Consider the following 8x8 matrix with 34 non-zero values, that is
4221    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4222    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4223    as follows
4224 
4225 .vb
4226             1  2  0  |  0  3  0  |  0  4
4227     Proc0   0  5  6  |  7  0  0  |  8  0
4228             9  0 10  | 11  0  0  | 12  0
4229     -------------------------------------
4230            13  0 14  | 15 16 17  |  0  0
4231     Proc1   0 18  0  | 19 20 21  |  0  0
4232             0  0  0  | 22 23  0  | 24  0
4233     -------------------------------------
4234     Proc2  25 26 27  |  0  0 28  | 29  0
4235            30  0  0  | 31 32 33  |  0 34
4236 .ve
4237 
4238    This can be represented as a collection of submatrices as
4239 
4240 .vb
4241       A B C
4242       D E F
4243       G H I
4244 .ve
4245 
4246    Where the submatrices A,B,C are owned by proc0, D,E,F are
4247    owned by proc1, G,H,I are owned by proc2.
4248 
4249    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4250    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4251    The 'M','N' parameters are 8,8, and have the same values on all procs.
4252 
4253    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4254    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4255    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4256    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4257    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4258    matrix, ans [DF] as another SeqAIJ matrix.
4259 
4260    When d_nz, o_nz parameters are specified, d_nz storage elements are
4261    allocated for every row of the local diagonal submatrix, and o_nz
4262    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4263    One way to choose d_nz and o_nz is to use the max nonzerors per local
4264    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4265    In this case, the values of d_nz,o_nz are
4266 .vb
4267      proc0 : dnz = 2, o_nz = 2
4268      proc1 : dnz = 3, o_nz = 2
4269      proc2 : dnz = 1, o_nz = 4
4270 .ve
4271    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4272    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4273    for proc3. i.e we are using 12+15+10=37 storage locations to store
4274    34 values.
4275 
4276    When d_nnz, o_nnz parameters are specified, the storage is specified
4277    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4278    In the above case the values for d_nnz,o_nnz are
4279 .vb
4280      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4281      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4282      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4283 .ve
4284    Here the space allocated is sum of all the above values i.e 34, and
4285    hence pre-allocation is perfect.
4286 
4287    Level: intermediate
4288 
4289 .keywords: matrix, aij, compressed row, sparse, parallel
4290 
4291 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4292           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4293 @*/
4294 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4295 {
4296   PetscErrorCode ierr;
4297   PetscMPIInt    size;
4298 
4299   PetscFunctionBegin;
4300   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4301   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4302   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4303   if (size > 1) {
4304     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4305     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4306   } else {
4307     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4308     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4309   }
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4314 {
4315   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4316   PetscBool      flg;
4317   PetscErrorCode ierr;
4318 
4319   PetscFunctionBegin;
4320   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4321   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4322   if (Ad)     *Ad     = a->A;
4323   if (Ao)     *Ao     = a->B;
4324   if (colmap) *colmap = a->garray;
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4329 {
4330   PetscErrorCode ierr;
4331   PetscInt       m,N,i,rstart,nnz,Ii;
4332   PetscInt       *indx;
4333   PetscScalar    *values;
4334 
4335   PetscFunctionBegin;
4336   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4337   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4338     PetscInt       *dnz,*onz,sum,bs,cbs;
4339 
4340     if (n == PETSC_DECIDE) {
4341       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4342     }
4343     /* Check sum(n) = N */
4344     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4345     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4346 
4347     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4348     rstart -= m;
4349 
4350     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4351     for (i=0; i<m; i++) {
4352       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4353       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4354       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4355     }
4356 
4357     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4358     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4359     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4360     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4361     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4362     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4363     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4364     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4365   }
4366 
4367   /* numeric phase */
4368   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4369   for (i=0; i<m; i++) {
4370     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4371     Ii   = i + rstart;
4372     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4373     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4374   }
4375   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4376   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4381 {
4382   PetscErrorCode    ierr;
4383   PetscMPIInt       rank;
4384   PetscInt          m,N,i,rstart,nnz;
4385   size_t            len;
4386   const PetscInt    *indx;
4387   PetscViewer       out;
4388   char              *name;
4389   Mat               B;
4390   const PetscScalar *values;
4391 
4392   PetscFunctionBegin;
4393   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4394   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4395   /* Should this be the type of the diagonal block of A? */
4396   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4397   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4398   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4399   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4400   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4401   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4402   for (i=0; i<m; i++) {
4403     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4404     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4405     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4406   }
4407   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4408   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4409 
4410   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4411   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4412   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4413   sprintf(name,"%s.%d",outfile,rank);
4414   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4415   ierr = PetscFree(name);CHKERRQ(ierr);
4416   ierr = MatView(B,out);CHKERRQ(ierr);
4417   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4418   ierr = MatDestroy(&B);CHKERRQ(ierr);
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4423 {
4424   PetscErrorCode      ierr;
4425   Mat_Merge_SeqsToMPI *merge;
4426   PetscContainer      container;
4427 
4428   PetscFunctionBegin;
4429   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4430   if (container) {
4431     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4432     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4433     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4434     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4435     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4436     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4444     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4445     ierr = PetscFree(merge);CHKERRQ(ierr);
4446     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4447   }
4448   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4449   PetscFunctionReturn(0);
4450 }
4451 
4452 #include <../src/mat/utils/freespace.h>
4453 #include <petscbt.h>
4454 
4455 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4456 {
4457   PetscErrorCode      ierr;
4458   MPI_Comm            comm;
4459   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4460   PetscMPIInt         size,rank,taga,*len_s;
4461   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4462   PetscInt            proc,m;
4463   PetscInt            **buf_ri,**buf_rj;
4464   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4465   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4466   MPI_Request         *s_waits,*r_waits;
4467   MPI_Status          *status;
4468   MatScalar           *aa=a->a;
4469   MatScalar           **abuf_r,*ba_i;
4470   Mat_Merge_SeqsToMPI *merge;
4471   PetscContainer      container;
4472 
4473   PetscFunctionBegin;
4474   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4475   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4476 
4477   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4478   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4479 
4480   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4481   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4482 
4483   bi     = merge->bi;
4484   bj     = merge->bj;
4485   buf_ri = merge->buf_ri;
4486   buf_rj = merge->buf_rj;
4487 
4488   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4489   owners = merge->rowmap->range;
4490   len_s  = merge->len_s;
4491 
4492   /* send and recv matrix values */
4493   /*-----------------------------*/
4494   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4495   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4496 
4497   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4498   for (proc=0,k=0; proc<size; proc++) {
4499     if (!len_s[proc]) continue;
4500     i    = owners[proc];
4501     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4502     k++;
4503   }
4504 
4505   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4506   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4507   ierr = PetscFree(status);CHKERRQ(ierr);
4508 
4509   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4510   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4511 
4512   /* insert mat values of mpimat */
4513   /*----------------------------*/
4514   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4515   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4516 
4517   for (k=0; k<merge->nrecv; k++) {
4518     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4519     nrows       = *(buf_ri_k[k]);
4520     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4521     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4522   }
4523 
4524   /* set values of ba */
4525   m = merge->rowmap->n;
4526   for (i=0; i<m; i++) {
4527     arow = owners[rank] + i;
4528     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4529     bnzi = bi[i+1] - bi[i];
4530     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4531 
4532     /* add local non-zero vals of this proc's seqmat into ba */
4533     anzi   = ai[arow+1] - ai[arow];
4534     aj     = a->j + ai[arow];
4535     aa     = a->a + ai[arow];
4536     nextaj = 0;
4537     for (j=0; nextaj<anzi; j++) {
4538       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4539         ba_i[j] += aa[nextaj++];
4540       }
4541     }
4542 
4543     /* add received vals into ba */
4544     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4545       /* i-th row */
4546       if (i == *nextrow[k]) {
4547         anzi   = *(nextai[k]+1) - *nextai[k];
4548         aj     = buf_rj[k] + *(nextai[k]);
4549         aa     = abuf_r[k] + *(nextai[k]);
4550         nextaj = 0;
4551         for (j=0; nextaj<anzi; j++) {
4552           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4553             ba_i[j] += aa[nextaj++];
4554           }
4555         }
4556         nextrow[k]++; nextai[k]++;
4557       }
4558     }
4559     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4560   }
4561   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4562   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4563 
4564   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4565   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4566   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4567   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4568   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4569   PetscFunctionReturn(0);
4570 }
4571 
4572 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4573 {
4574   PetscErrorCode      ierr;
4575   Mat                 B_mpi;
4576   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4577   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4578   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4579   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4580   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4581   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4582   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4583   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4584   MPI_Status          *status;
4585   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4586   PetscBT             lnkbt;
4587   Mat_Merge_SeqsToMPI *merge;
4588   PetscContainer      container;
4589 
4590   PetscFunctionBegin;
4591   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4592 
4593   /* make sure it is a PETSc comm */
4594   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4595   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4596   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4597 
4598   ierr = PetscNew(&merge);CHKERRQ(ierr);
4599   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4600 
4601   /* determine row ownership */
4602   /*---------------------------------------------------------*/
4603   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4604   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4605   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4606   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4607   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4608   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4609   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4610 
4611   m      = merge->rowmap->n;
4612   owners = merge->rowmap->range;
4613 
4614   /* determine the number of messages to send, their lengths */
4615   /*---------------------------------------------------------*/
4616   len_s = merge->len_s;
4617 
4618   len          = 0; /* length of buf_si[] */
4619   merge->nsend = 0;
4620   for (proc=0; proc<size; proc++) {
4621     len_si[proc] = 0;
4622     if (proc == rank) {
4623       len_s[proc] = 0;
4624     } else {
4625       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4626       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4627     }
4628     if (len_s[proc]) {
4629       merge->nsend++;
4630       nrows = 0;
4631       for (i=owners[proc]; i<owners[proc+1]; i++) {
4632         if (ai[i+1] > ai[i]) nrows++;
4633       }
4634       len_si[proc] = 2*(nrows+1);
4635       len         += len_si[proc];
4636     }
4637   }
4638 
4639   /* determine the number and length of messages to receive for ij-structure */
4640   /*-------------------------------------------------------------------------*/
4641   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4642   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4643 
4644   /* post the Irecv of j-structure */
4645   /*-------------------------------*/
4646   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4647   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4648 
4649   /* post the Isend of j-structure */
4650   /*--------------------------------*/
4651   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4652 
4653   for (proc=0, k=0; proc<size; proc++) {
4654     if (!len_s[proc]) continue;
4655     i    = owners[proc];
4656     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4657     k++;
4658   }
4659 
4660   /* receives and sends of j-structure are complete */
4661   /*------------------------------------------------*/
4662   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4663   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4664 
4665   /* send and recv i-structure */
4666   /*---------------------------*/
4667   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4668   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4669 
4670   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4671   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4672   for (proc=0,k=0; proc<size; proc++) {
4673     if (!len_s[proc]) continue;
4674     /* form outgoing message for i-structure:
4675          buf_si[0]:                 nrows to be sent
4676                [1:nrows]:           row index (global)
4677                [nrows+1:2*nrows+1]: i-structure index
4678     */
4679     /*-------------------------------------------*/
4680     nrows       = len_si[proc]/2 - 1;
4681     buf_si_i    = buf_si + nrows+1;
4682     buf_si[0]   = nrows;
4683     buf_si_i[0] = 0;
4684     nrows       = 0;
4685     for (i=owners[proc]; i<owners[proc+1]; i++) {
4686       anzi = ai[i+1] - ai[i];
4687       if (anzi) {
4688         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4689         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4690         nrows++;
4691       }
4692     }
4693     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4694     k++;
4695     buf_si += len_si[proc];
4696   }
4697 
4698   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4699   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4700 
4701   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4702   for (i=0; i<merge->nrecv; i++) {
4703     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4704   }
4705 
4706   ierr = PetscFree(len_si);CHKERRQ(ierr);
4707   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4708   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4709   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4710   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4711   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4712   ierr = PetscFree(status);CHKERRQ(ierr);
4713 
4714   /* compute a local seq matrix in each processor */
4715   /*----------------------------------------------*/
4716   /* allocate bi array and free space for accumulating nonzero column info */
4717   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4718   bi[0] = 0;
4719 
4720   /* create and initialize a linked list */
4721   nlnk = N+1;
4722   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4723 
4724   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4725   len  = ai[owners[rank+1]] - ai[owners[rank]];
4726   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4727 
4728   current_space = free_space;
4729 
4730   /* determine symbolic info for each local row */
4731   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4732 
4733   for (k=0; k<merge->nrecv; k++) {
4734     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4735     nrows       = *buf_ri_k[k];
4736     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4737     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4738   }
4739 
4740   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4741   len  = 0;
4742   for (i=0; i<m; i++) {
4743     bnzi = 0;
4744     /* add local non-zero cols of this proc's seqmat into lnk */
4745     arow  = owners[rank] + i;
4746     anzi  = ai[arow+1] - ai[arow];
4747     aj    = a->j + ai[arow];
4748     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4749     bnzi += nlnk;
4750     /* add received col data into lnk */
4751     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4752       if (i == *nextrow[k]) { /* i-th row */
4753         anzi  = *(nextai[k]+1) - *nextai[k];
4754         aj    = buf_rj[k] + *nextai[k];
4755         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4756         bnzi += nlnk;
4757         nextrow[k]++; nextai[k]++;
4758       }
4759     }
4760     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4761 
4762     /* if free space is not available, make more free space */
4763     if (current_space->local_remaining<bnzi) {
4764       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4765       nspacedouble++;
4766     }
4767     /* copy data into free space, then initialize lnk */
4768     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4769     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4770 
4771     current_space->array           += bnzi;
4772     current_space->local_used      += bnzi;
4773     current_space->local_remaining -= bnzi;
4774 
4775     bi[i+1] = bi[i] + bnzi;
4776   }
4777 
4778   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4779 
4780   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4781   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4782   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4783 
4784   /* create symbolic parallel matrix B_mpi */
4785   /*---------------------------------------*/
4786   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4787   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4788   if (n==PETSC_DECIDE) {
4789     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4790   } else {
4791     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4792   }
4793   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4794   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4795   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4796   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4797   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4798 
4799   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4800   B_mpi->assembled    = PETSC_FALSE;
4801   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4802   merge->bi           = bi;
4803   merge->bj           = bj;
4804   merge->buf_ri       = buf_ri;
4805   merge->buf_rj       = buf_rj;
4806   merge->coi          = NULL;
4807   merge->coj          = NULL;
4808   merge->owners_co    = NULL;
4809 
4810   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4811 
4812   /* attach the supporting struct to B_mpi for reuse */
4813   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4814   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4815   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4816   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4817   *mpimat = B_mpi;
4818 
4819   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4820   PetscFunctionReturn(0);
4821 }
4822 
4823 /*@C
4824       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4825                  matrices from each processor
4826 
4827     Collective on MPI_Comm
4828 
4829    Input Parameters:
4830 +    comm - the communicators the parallel matrix will live on
4831 .    seqmat - the input sequential matrices
4832 .    m - number of local rows (or PETSC_DECIDE)
4833 .    n - number of local columns (or PETSC_DECIDE)
4834 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4835 
4836    Output Parameter:
4837 .    mpimat - the parallel matrix generated
4838 
4839     Level: advanced
4840 
4841    Notes:
4842      The dimensions of the sequential matrix in each processor MUST be the same.
4843      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4844      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4845 @*/
4846 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4847 {
4848   PetscErrorCode ierr;
4849   PetscMPIInt    size;
4850 
4851   PetscFunctionBegin;
4852   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4853   if (size == 1) {
4854     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4855     if (scall == MAT_INITIAL_MATRIX) {
4856       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4857     } else {
4858       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4859     }
4860     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4861     PetscFunctionReturn(0);
4862   }
4863   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4864   if (scall == MAT_INITIAL_MATRIX) {
4865     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4866   }
4867   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4868   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4869   PetscFunctionReturn(0);
4870 }
4871 
4872 /*@
4873      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4874           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4875           with MatGetSize()
4876 
4877     Not Collective
4878 
4879    Input Parameters:
4880 +    A - the matrix
4881 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4882 
4883    Output Parameter:
4884 .    A_loc - the local sequential matrix generated
4885 
4886     Level: developer
4887 
4888 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4889 
4890 @*/
4891 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4892 {
4893   PetscErrorCode ierr;
4894   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4895   Mat_SeqAIJ     *mat,*a,*b;
4896   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4897   MatScalar      *aa,*ba,*cam;
4898   PetscScalar    *ca;
4899   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4900   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4901   PetscBool      match;
4902   MPI_Comm       comm;
4903   PetscMPIInt    size;
4904 
4905   PetscFunctionBegin;
4906   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4907   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4908   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4910   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4911 
4912   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4913   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4914   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4915   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4916   aa = a->a; ba = b->a;
4917   if (scall == MAT_INITIAL_MATRIX) {
4918     if (size == 1) {
4919       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4920       PetscFunctionReturn(0);
4921     }
4922 
4923     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4924     ci[0] = 0;
4925     for (i=0; i<am; i++) {
4926       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4927     }
4928     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4929     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4930     k    = 0;
4931     for (i=0; i<am; i++) {
4932       ncols_o = bi[i+1] - bi[i];
4933       ncols_d = ai[i+1] - ai[i];
4934       /* off-diagonal portion of A */
4935       for (jo=0; jo<ncols_o; jo++) {
4936         col = cmap[*bj];
4937         if (col >= cstart) break;
4938         cj[k]   = col; bj++;
4939         ca[k++] = *ba++;
4940       }
4941       /* diagonal portion of A */
4942       for (j=0; j<ncols_d; j++) {
4943         cj[k]   = cstart + *aj++;
4944         ca[k++] = *aa++;
4945       }
4946       /* off-diagonal portion of A */
4947       for (j=jo; j<ncols_o; j++) {
4948         cj[k]   = cmap[*bj++];
4949         ca[k++] = *ba++;
4950       }
4951     }
4952     /* put together the new matrix */
4953     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4954     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4955     /* Since these are PETSc arrays, change flags to free them as necessary. */
4956     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4957     mat->free_a  = PETSC_TRUE;
4958     mat->free_ij = PETSC_TRUE;
4959     mat->nonew   = 0;
4960   } else if (scall == MAT_REUSE_MATRIX) {
4961     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4962     ci = mat->i; cj = mat->j; cam = mat->a;
4963     for (i=0; i<am; i++) {
4964       /* off-diagonal portion of A */
4965       ncols_o = bi[i+1] - bi[i];
4966       for (jo=0; jo<ncols_o; jo++) {
4967         col = cmap[*bj];
4968         if (col >= cstart) break;
4969         *cam++ = *ba++; bj++;
4970       }
4971       /* diagonal portion of A */
4972       ncols_d = ai[i+1] - ai[i];
4973       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4974       /* off-diagonal portion of A */
4975       for (j=jo; j<ncols_o; j++) {
4976         *cam++ = *ba++; bj++;
4977       }
4978     }
4979   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4980   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4981   PetscFunctionReturn(0);
4982 }
4983 
4984 /*@C
4985      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4986 
4987     Not Collective
4988 
4989    Input Parameters:
4990 +    A - the matrix
4991 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4992 -    row, col - index sets of rows and columns to extract (or NULL)
4993 
4994    Output Parameter:
4995 .    A_loc - the local sequential matrix generated
4996 
4997     Level: developer
4998 
4999 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5000 
5001 @*/
5002 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5003 {
5004   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5005   PetscErrorCode ierr;
5006   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5007   IS             isrowa,iscola;
5008   Mat            *aloc;
5009   PetscBool      match;
5010 
5011   PetscFunctionBegin;
5012   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5013   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5014   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5015   if (!row) {
5016     start = A->rmap->rstart; end = A->rmap->rend;
5017     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5018   } else {
5019     isrowa = *row;
5020   }
5021   if (!col) {
5022     start = A->cmap->rstart;
5023     cmap  = a->garray;
5024     nzA   = a->A->cmap->n;
5025     nzB   = a->B->cmap->n;
5026     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5027     ncols = 0;
5028     for (i=0; i<nzB; i++) {
5029       if (cmap[i] < start) idx[ncols++] = cmap[i];
5030       else break;
5031     }
5032     imark = i;
5033     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5034     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5035     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5036   } else {
5037     iscola = *col;
5038   }
5039   if (scall != MAT_INITIAL_MATRIX) {
5040     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5041     aloc[0] = *A_loc;
5042   }
5043   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5044   *A_loc = aloc[0];
5045   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5046   if (!row) {
5047     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5048   }
5049   if (!col) {
5050     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5051   }
5052   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5053   PetscFunctionReturn(0);
5054 }
5055 
5056 /*@C
5057     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5058 
5059     Collective on Mat
5060 
5061    Input Parameters:
5062 +    A,B - the matrices in mpiaij format
5063 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5064 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5065 
5066    Output Parameter:
5067 +    rowb, colb - index sets of rows and columns of B to extract
5068 -    B_seq - the sequential matrix generated
5069 
5070     Level: developer
5071 
5072 @*/
5073 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5074 {
5075   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5076   PetscErrorCode ierr;
5077   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5078   IS             isrowb,iscolb;
5079   Mat            *bseq=NULL;
5080 
5081   PetscFunctionBegin;
5082   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5083     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5084   }
5085   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5086 
5087   if (scall == MAT_INITIAL_MATRIX) {
5088     start = A->cmap->rstart;
5089     cmap  = a->garray;
5090     nzA   = a->A->cmap->n;
5091     nzB   = a->B->cmap->n;
5092     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5093     ncols = 0;
5094     for (i=0; i<nzB; i++) {  /* row < local row index */
5095       if (cmap[i] < start) idx[ncols++] = cmap[i];
5096       else break;
5097     }
5098     imark = i;
5099     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5100     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5101     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5102     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5103   } else {
5104     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5105     isrowb  = *rowb; iscolb = *colb;
5106     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5107     bseq[0] = *B_seq;
5108   }
5109   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5110   *B_seq = bseq[0];
5111   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5112   if (!rowb) {
5113     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5114   } else {
5115     *rowb = isrowb;
5116   }
5117   if (!colb) {
5118     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5119   } else {
5120     *colb = iscolb;
5121   }
5122   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5123   PetscFunctionReturn(0);
5124 }
5125 
5126 /*
5127     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5128     of the OFF-DIAGONAL portion of local A
5129 
5130     Collective on Mat
5131 
5132    Input Parameters:
5133 +    A,B - the matrices in mpiaij format
5134 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5135 
5136    Output Parameter:
5137 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5138 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5139 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5140 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5141 
5142     Level: developer
5143 
5144 */
5145 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5146 {
5147   VecScatter_MPI_General *gen_to,*gen_from;
5148   PetscErrorCode         ierr;
5149   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5150   Mat_SeqAIJ             *b_oth;
5151   VecScatter             ctx;
5152   MPI_Comm               comm;
5153   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5154   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5155   PetscInt               *rvalues,*svalues;
5156   MatScalar              *b_otha,*bufa,*bufA;
5157   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5158   MPI_Request            *rwaits = NULL,*swaits = NULL;
5159   MPI_Status             *sstatus,rstatus;
5160   PetscMPIInt            jj,size;
5161   PetscInt               *cols,sbs,rbs;
5162   PetscScalar            *vals;
5163 
5164   PetscFunctionBegin;
5165   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5166   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5167 
5168   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5169     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5170   }
5171   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5172   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5173 
5174   if (size == 1) {
5175     startsj_s = NULL;
5176     bufa_ptr  = NULL;
5177     *B_oth    = NULL;
5178     PetscFunctionReturn(0);
5179   }
5180 
5181   if (!a->Mvctx_mpi1) { /* create a->Mvctx_mpi1 to be used for Mat-Mat ops */
5182     a->Mvctx_mpi1_flg = PETSC_TRUE;
5183     ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5184   }
5185   ctx = a->Mvctx_mpi1;
5186   tag = ((PetscObject)ctx)->tag;
5187 
5188   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5189   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5190   nrecvs   = gen_from->n;
5191   nsends   = gen_to->n;
5192 
5193   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5194   srow    = gen_to->indices;    /* local row index to be sent */
5195   sstarts = gen_to->starts;
5196   sprocs  = gen_to->procs;
5197   sstatus = gen_to->sstatus;
5198   sbs     = gen_to->bs;
5199   rstarts = gen_from->starts;
5200   rprocs  = gen_from->procs;
5201   rbs     = gen_from->bs;
5202 
5203   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5204   if (scall == MAT_INITIAL_MATRIX) {
5205     /* i-array */
5206     /*---------*/
5207     /*  post receives */
5208     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5209     for (i=0; i<nrecvs; i++) {
5210       rowlen = rvalues + rstarts[i]*rbs;
5211       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5212       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5213     }
5214 
5215     /* pack the outgoing message */
5216     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5217 
5218     sstartsj[0] = 0;
5219     rstartsj[0] = 0;
5220     len         = 0; /* total length of j or a array to be sent */
5221     k           = 0;
5222     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5223     for (i=0; i<nsends; i++) {
5224       rowlen = svalues + sstarts[i]*sbs;
5225       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5226       for (j=0; j<nrows; j++) {
5227         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5228         for (l=0; l<sbs; l++) {
5229           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5230 
5231           rowlen[j*sbs+l] = ncols;
5232 
5233           len += ncols;
5234           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5235         }
5236         k++;
5237       }
5238       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5239 
5240       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5241     }
5242     /* recvs and sends of i-array are completed */
5243     i = nrecvs;
5244     while (i--) {
5245       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5246     }
5247     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5248     ierr = PetscFree(svalues);CHKERRQ(ierr);
5249 
5250     /* allocate buffers for sending j and a arrays */
5251     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5252     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5253 
5254     /* create i-array of B_oth */
5255     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5256 
5257     b_othi[0] = 0;
5258     len       = 0; /* total length of j or a array to be received */
5259     k         = 0;
5260     for (i=0; i<nrecvs; i++) {
5261       rowlen = rvalues + rstarts[i]*rbs;
5262       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5263       for (j=0; j<nrows; j++) {
5264         b_othi[k+1] = b_othi[k] + rowlen[j];
5265         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5266         k++;
5267       }
5268       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5269     }
5270     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5271 
5272     /* allocate space for j and a arrrays of B_oth */
5273     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5274     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5275 
5276     /* j-array */
5277     /*---------*/
5278     /*  post receives of j-array */
5279     for (i=0; i<nrecvs; i++) {
5280       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5281       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5282     }
5283 
5284     /* pack the outgoing message j-array */
5285     k = 0;
5286     for (i=0; i<nsends; i++) {
5287       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5288       bufJ  = bufj+sstartsj[i];
5289       for (j=0; j<nrows; j++) {
5290         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5291         for (ll=0; ll<sbs; ll++) {
5292           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5293           for (l=0; l<ncols; l++) {
5294             *bufJ++ = cols[l];
5295           }
5296           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5297         }
5298       }
5299       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5300     }
5301 
5302     /* recvs and sends of j-array are completed */
5303     i = nrecvs;
5304     while (i--) {
5305       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5306     }
5307     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5308   } else if (scall == MAT_REUSE_MATRIX) {
5309     sstartsj = *startsj_s;
5310     rstartsj = *startsj_r;
5311     bufa     = *bufa_ptr;
5312     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5313     b_otha   = b_oth->a;
5314   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5315 
5316   /* a-array */
5317   /*---------*/
5318   /*  post receives of a-array */
5319   for (i=0; i<nrecvs; i++) {
5320     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5321     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5322   }
5323 
5324   /* pack the outgoing message a-array */
5325   k = 0;
5326   for (i=0; i<nsends; i++) {
5327     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5328     bufA  = bufa+sstartsj[i];
5329     for (j=0; j<nrows; j++) {
5330       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5331       for (ll=0; ll<sbs; ll++) {
5332         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5333         for (l=0; l<ncols; l++) {
5334           *bufA++ = vals[l];
5335         }
5336         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5337       }
5338     }
5339     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5340   }
5341   /* recvs and sends of a-array are completed */
5342   i = nrecvs;
5343   while (i--) {
5344     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5345   }
5346   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5347   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5348 
5349   if (scall == MAT_INITIAL_MATRIX) {
5350     /* put together the new matrix */
5351     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5352 
5353     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5354     /* Since these are PETSc arrays, change flags to free them as necessary. */
5355     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5356     b_oth->free_a  = PETSC_TRUE;
5357     b_oth->free_ij = PETSC_TRUE;
5358     b_oth->nonew   = 0;
5359 
5360     ierr = PetscFree(bufj);CHKERRQ(ierr);
5361     if (!startsj_s || !bufa_ptr) {
5362       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5363       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5364     } else {
5365       *startsj_s = sstartsj;
5366       *startsj_r = rstartsj;
5367       *bufa_ptr  = bufa;
5368     }
5369   }
5370   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5371   PetscFunctionReturn(0);
5372 }
5373 
5374 /*@C
5375   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5376 
5377   Not Collective
5378 
5379   Input Parameters:
5380 . A - The matrix in mpiaij format
5381 
5382   Output Parameter:
5383 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5384 . colmap - A map from global column index to local index into lvec
5385 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5386 
5387   Level: developer
5388 
5389 @*/
5390 #if defined(PETSC_USE_CTABLE)
5391 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5392 #else
5393 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5394 #endif
5395 {
5396   Mat_MPIAIJ *a;
5397 
5398   PetscFunctionBegin;
5399   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5400   PetscValidPointer(lvec, 2);
5401   PetscValidPointer(colmap, 3);
5402   PetscValidPointer(multScatter, 4);
5403   a = (Mat_MPIAIJ*) A->data;
5404   if (lvec) *lvec = a->lvec;
5405   if (colmap) *colmap = a->colmap;
5406   if (multScatter) *multScatter = a->Mvctx;
5407   PetscFunctionReturn(0);
5408 }
5409 
5410 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5411 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5412 #if defined(PETSC_HAVE_MKL_SPARSE)
5413 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5414 #endif
5415 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5416 #if defined(PETSC_HAVE_ELEMENTAL)
5417 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5418 #endif
5419 #if defined(PETSC_HAVE_HYPRE)
5420 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5421 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5422 #endif
5423 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5425 
5426 /*
5427     Computes (B'*A')' since computing B*A directly is untenable
5428 
5429                n                       p                          p
5430         (              )       (              )         (                  )
5431       m (      A       )  *  n (       B      )   =   m (         C        )
5432         (              )       (              )         (                  )
5433 
5434 */
5435 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5436 {
5437   PetscErrorCode ierr;
5438   Mat            At,Bt,Ct;
5439 
5440   PetscFunctionBegin;
5441   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5442   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5443   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5444   ierr = MatDestroy(&At);CHKERRQ(ierr);
5445   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5446   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5447   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5448   PetscFunctionReturn(0);
5449 }
5450 
5451 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5452 {
5453   PetscErrorCode ierr;
5454   PetscInt       m=A->rmap->n,n=B->cmap->n;
5455   Mat            Cmat;
5456 
5457   PetscFunctionBegin;
5458   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5459   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5460   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5461   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5462   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5463   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5464   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5465   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5466 
5467   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5468 
5469   *C = Cmat;
5470   PetscFunctionReturn(0);
5471 }
5472 
5473 /* ----------------------------------------------------------------*/
5474 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5475 {
5476   PetscErrorCode ierr;
5477 
5478   PetscFunctionBegin;
5479   if (scall == MAT_INITIAL_MATRIX) {
5480     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5481     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5482     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5483   }
5484   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5485   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5486   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5487   PetscFunctionReturn(0);
5488 }
5489 
5490 /*MC
5491    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5492 
5493    Options Database Keys:
5494 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5495 
5496   Level: beginner
5497 
5498 .seealso: MatCreateAIJ()
5499 M*/
5500 
5501 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5502 {
5503   Mat_MPIAIJ     *b;
5504   PetscErrorCode ierr;
5505   PetscMPIInt    size;
5506 
5507   PetscFunctionBegin;
5508   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5509 
5510   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5511   B->data       = (void*)b;
5512   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5513   B->assembled  = PETSC_FALSE;
5514   B->insertmode = NOT_SET_VALUES;
5515   b->size       = size;
5516 
5517   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5518 
5519   /* build cache for off array entries formed */
5520   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5521 
5522   b->donotstash  = PETSC_FALSE;
5523   b->colmap      = 0;
5524   b->garray      = 0;
5525   b->roworiented = PETSC_TRUE;
5526 
5527   /* stuff used for matrix vector multiply */
5528   b->lvec  = NULL;
5529   b->Mvctx = NULL;
5530 
5531   /* stuff for MatGetRow() */
5532   b->rowindices   = 0;
5533   b->rowvalues    = 0;
5534   b->getrowactive = PETSC_FALSE;
5535 
5536   /* flexible pointer used in CUSP/CUSPARSE classes */
5537   b->spptr = NULL;
5538 
5539   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5540   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5541   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5543   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5544   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5545   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5546   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5547   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5548 #if defined(PETSC_HAVE_MKL_SPARSE)
5549   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5550 #endif
5551   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5553 #if defined(PETSC_HAVE_ELEMENTAL)
5554   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5555 #endif
5556 #if defined(PETSC_HAVE_HYPRE)
5557   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5558 #endif
5559   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5561   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5563   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5564 #if defined(PETSC_HAVE_HYPRE)
5565   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5566 #endif
5567   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5568   PetscFunctionReturn(0);
5569 }
5570 
5571 /*@C
5572      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5573          and "off-diagonal" part of the matrix in CSR format.
5574 
5575    Collective on MPI_Comm
5576 
5577    Input Parameters:
5578 +  comm - MPI communicator
5579 .  m - number of local rows (Cannot be PETSC_DECIDE)
5580 .  n - This value should be the same as the local size used in creating the
5581        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5582        calculated if N is given) For square matrices n is almost always m.
5583 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5584 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5585 .   i - row indices for "diagonal" portion of matrix
5586 .   j - column indices
5587 .   a - matrix values
5588 .   oi - row indices for "off-diagonal" portion of matrix
5589 .   oj - column indices
5590 -   oa - matrix values
5591 
5592    Output Parameter:
5593 .   mat - the matrix
5594 
5595    Level: advanced
5596 
5597    Notes:
5598        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5599        must free the arrays once the matrix has been destroyed and not before.
5600 
5601        The i and j indices are 0 based
5602 
5603        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5604 
5605        This sets local rows and cannot be used to set off-processor values.
5606 
5607        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5608        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5609        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5610        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5611        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5612        communication if it is known that only local entries will be set.
5613 
5614 .keywords: matrix, aij, compressed row, sparse, parallel
5615 
5616 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5617           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5618 @*/
5619 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5620 {
5621   PetscErrorCode ierr;
5622   Mat_MPIAIJ     *maij;
5623 
5624   PetscFunctionBegin;
5625   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5626   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5627   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5628   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5629   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5630   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5631   maij = (Mat_MPIAIJ*) (*mat)->data;
5632 
5633   (*mat)->preallocated = PETSC_TRUE;
5634 
5635   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5636   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5637 
5638   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5639   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5640 
5641   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5642   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5643   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5644   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5645 
5646   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5647   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5648   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5649   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5650   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5651   PetscFunctionReturn(0);
5652 }
5653 
5654 /*
5655     Special version for direct calls from Fortran
5656 */
5657 #include <petsc/private/fortranimpl.h>
5658 
5659 /* Change these macros so can be used in void function */
5660 #undef CHKERRQ
5661 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5662 #undef SETERRQ2
5663 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5664 #undef SETERRQ3
5665 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5666 #undef SETERRQ
5667 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5668 
5669 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5670 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5671 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5672 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5673 #else
5674 #endif
5675 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5676 {
5677   Mat            mat  = *mmat;
5678   PetscInt       m    = *mm, n = *mn;
5679   InsertMode     addv = *maddv;
5680   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5681   PetscScalar    value;
5682   PetscErrorCode ierr;
5683 
5684   MatCheckPreallocated(mat,1);
5685   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5686 
5687 #if defined(PETSC_USE_DEBUG)
5688   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5689 #endif
5690   {
5691     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5692     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5693     PetscBool roworiented = aij->roworiented;
5694 
5695     /* Some Variables required in the macro */
5696     Mat        A                 = aij->A;
5697     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5698     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5699     MatScalar  *aa               = a->a;
5700     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5701     Mat        B                 = aij->B;
5702     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5703     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5704     MatScalar  *ba               = b->a;
5705 
5706     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5707     PetscInt  nonew = a->nonew;
5708     MatScalar *ap1,*ap2;
5709 
5710     PetscFunctionBegin;
5711     for (i=0; i<m; i++) {
5712       if (im[i] < 0) continue;
5713 #if defined(PETSC_USE_DEBUG)
5714       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5715 #endif
5716       if (im[i] >= rstart && im[i] < rend) {
5717         row      = im[i] - rstart;
5718         lastcol1 = -1;
5719         rp1      = aj + ai[row];
5720         ap1      = aa + ai[row];
5721         rmax1    = aimax[row];
5722         nrow1    = ailen[row];
5723         low1     = 0;
5724         high1    = nrow1;
5725         lastcol2 = -1;
5726         rp2      = bj + bi[row];
5727         ap2      = ba + bi[row];
5728         rmax2    = bimax[row];
5729         nrow2    = bilen[row];
5730         low2     = 0;
5731         high2    = nrow2;
5732 
5733         for (j=0; j<n; j++) {
5734           if (roworiented) value = v[i*n+j];
5735           else value = v[i+j*m];
5736           if (in[j] >= cstart && in[j] < cend) {
5737             col = in[j] - cstart;
5738             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5739             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5740           } else if (in[j] < 0) continue;
5741 #if defined(PETSC_USE_DEBUG)
5742           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5743 #endif
5744           else {
5745             if (mat->was_assembled) {
5746               if (!aij->colmap) {
5747                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5748               }
5749 #if defined(PETSC_USE_CTABLE)
5750               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5751               col--;
5752 #else
5753               col = aij->colmap[in[j]] - 1;
5754 #endif
5755               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5756               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5757                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5758                 col  =  in[j];
5759                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5760                 B     = aij->B;
5761                 b     = (Mat_SeqAIJ*)B->data;
5762                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5763                 rp2   = bj + bi[row];
5764                 ap2   = ba + bi[row];
5765                 rmax2 = bimax[row];
5766                 nrow2 = bilen[row];
5767                 low2  = 0;
5768                 high2 = nrow2;
5769                 bm    = aij->B->rmap->n;
5770                 ba    = b->a;
5771               }
5772             } else col = in[j];
5773             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5774           }
5775         }
5776       } else if (!aij->donotstash) {
5777         if (roworiented) {
5778           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5779         } else {
5780           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5781         }
5782       }
5783     }
5784   }
5785   PetscFunctionReturnVoid();
5786 }
5787 
5788