xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 19b3b6edfb779801a913552352b4c9d2af9e2752)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938   VecScatter     Mvctx = a->Mvctx;
939 
940   PetscFunctionBegin;
941   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
942   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
943   if (a->Mvctx_mpi1_flg) {
944     Mvctx = a->Mvctx_mpi1;
945 #if 0
946     MPI_Comm comm;
947     ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
948     ierr = VecView(xx,PETSC_VIEWER_STDOUT_(comm));CHKERRQ(ierr);CHKERRQ(ierr);
949     ierr = VecSet(a->lvec,0.0);CHKERRQ(ierr);
950     ierr = VecView(a->lvec,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);CHKERRQ(ierr);
951 #endif
952   }
953   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
955   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
961 {
962   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
971 {
972   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
977   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
978   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
979   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987   PetscBool      merged;
988 
989   PetscFunctionBegin;
990   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
991   /* do nondiagonal part */
992   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
993   if (!merged) {
994     /* send it on its way */
995     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
996     /* do local part */
997     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
998     /* receive remote parts: note this assumes the values are not actually */
999     /* added in yy until the next line, */
1000     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1001   } else {
1002     /* do local part */
1003     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1004     /* send it on its way */
1005     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1006     /* values actually were received in the Begin() but we need to call this nop */
1007     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1013 {
1014   MPI_Comm       comm;
1015   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017   IS             Me,Notme;
1018   PetscErrorCode ierr;
1019   PetscInt       M,N,first,last,*notme,i;
1020   PetscMPIInt    size;
1021 
1022   PetscFunctionBegin;
1023   /* Easy test: symmetric diagonal block */
1024   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1025   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1026   if (!*f) PetscFunctionReturn(0);
1027   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1028   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1032   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1034   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1035   for (i=0; i<first; i++) notme[i] = i;
1036   for (i=last; i<M; i++) notme[i-last+first] = i;
1037   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1038   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1039   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1040   Aoff = Aoffs[0];
1041   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1042   Boff = Boffs[0];
1043   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1044   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1045   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1046   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1047   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1048   ierr = PetscFree(notme);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1053 {
1054   PetscErrorCode ierr;
1055 
1056   PetscFunctionBegin;
1057   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1062 {
1063   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1064   PetscErrorCode ierr;
1065 
1066   PetscFunctionBegin;
1067   /* do nondiagonal part */
1068   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1069   /* send it on its way */
1070   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1071   /* do local part */
1072   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1073   /* receive remote parts */
1074   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 /*
1079   This only works correctly for square matrices where the subblock A->A is the
1080    diagonal block
1081 */
1082 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1083 {
1084   PetscErrorCode ierr;
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086 
1087   PetscFunctionBegin;
1088   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1089   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1090   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1095 {
1096   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1101   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1106 {
1107   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111 #if defined(PETSC_USE_LOG)
1112   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1113 #endif
1114   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1115   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1116   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1117   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1118 #if defined(PETSC_USE_CTABLE)
1119   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1120 #else
1121   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1122 #endif
1123   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1124   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1125   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1126   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1127   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1128   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1129   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1130 
1131   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1140 #if defined(PETSC_HAVE_ELEMENTAL)
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1142 #endif
1143 #if defined(PETSC_HAVE_HYPRE)
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1146 #endif
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1151 {
1152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1153   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1154   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1155   PetscErrorCode ierr;
1156   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1157   int            fd;
1158   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1159   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1160   PetscScalar    *column_values;
1161   PetscInt       message_count,flowcontrolcount;
1162   FILE           *file;
1163 
1164   PetscFunctionBegin;
1165   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1166   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1167   nz   = A->nz + B->nz;
1168   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1169   if (!rank) {
1170     header[0] = MAT_FILE_CLASSID;
1171     header[1] = mat->rmap->N;
1172     header[2] = mat->cmap->N;
1173 
1174     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     /* get largest number of rows any processor has */
1177     rlen  = mat->rmap->n;
1178     range = mat->rmap->range;
1179     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1180   } else {
1181     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1182     rlen = mat->rmap->n;
1183   }
1184 
1185   /* load up the local row counts */
1186   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1187   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1188 
1189   /* store the row lengths to the file */
1190   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1191   if (!rank) {
1192     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1193     for (i=1; i<size; i++) {
1194       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1195       rlen = range[i+1] - range[i];
1196       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1198     }
1199     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1200   } else {
1201     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1202     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1204   }
1205   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1206 
1207   /* load up the local column indices */
1208   nzmax = nz; /* th processor needs space a largest processor needs */
1209   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1211   cnt   = 0;
1212   for (i=0; i<mat->rmap->n; i++) {
1213     for (j=B->i[i]; j<B->i[i+1]; j++) {
1214       if ((col = garray[B->j[j]]) > cstart) break;
1215       column_indices[cnt++] = col;
1216     }
1217     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1218     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1219   }
1220   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1221 
1222   /* store the column indices to the file */
1223   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1224   if (!rank) {
1225     MPI_Status status;
1226     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     for (i=1; i<size; i++) {
1228       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1229       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1230       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1231       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1242 
1243   /* load up the local column values */
1244   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1245   cnt  = 0;
1246   for (i=0; i<mat->rmap->n; i++) {
1247     for (j=B->i[i]; j<B->i[i+1]; j++) {
1248       if (garray[B->j[j]] > cstart) break;
1249       column_values[cnt++] = B->a[j];
1250     }
1251     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1252     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1253   }
1254   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1255 
1256   /* store the column values to the file */
1257   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1258   if (!rank) {
1259     MPI_Status status;
1260     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1261     for (i=1; i<size; i++) {
1262       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1263       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1264       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1265       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1267     }
1268     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1269   } else {
1270     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1271     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1272     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1273     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1274   }
1275   ierr = PetscFree(column_values);CHKERRQ(ierr);
1276 
1277   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1278   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #include <petscdraw.h>
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank = aij->rank,size = aij->size;
1288   PetscBool         isdraw,iascii,isbinary;
1289   PetscViewer       sviewer;
1290   PetscViewerFormat format;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1294   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1295   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1296   if (iascii) {
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1299       MatInfo   info;
1300       PetscBool inodes;
1301 
1302       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1303       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1304       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1306       if (!inodes) {
1307         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1308                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1309       } else {
1310         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1311                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1312       }
1313       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1315       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1317       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1318       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1320       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1323       PetscInt inodecount,inodelimit,*inodes;
1324       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1325       if (inodes) {
1326         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1327       } else {
1328         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1329       }
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1332       PetscFunctionReturn(0);
1333     }
1334   } else if (isbinary) {
1335     if (size == 1) {
1336       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1337       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1340     }
1341     PetscFunctionReturn(0);
1342   } else if (isdraw) {
1343     PetscDraw draw;
1344     PetscBool isnull;
1345     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1346     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1347     if (isnull) PetscFunctionReturn(0);
1348   }
1349 
1350   {
1351     /* assemble the entire matrix onto first processor. */
1352     Mat        A;
1353     Mat_SeqAIJ *Aloc;
1354     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1355     MatScalar  *a;
1356 
1357     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1358     if (!rank) {
1359       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1360     } else {
1361       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1362     }
1363     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1364     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1365     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1366     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1367     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1368 
1369     /* copy over the A part */
1370     Aloc = (Mat_SeqAIJ*)aij->A->data;
1371     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1372     row  = mat->rmap->rstart;
1373     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1374     for (i=0; i<m; i++) {
1375       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1376       row++;
1377       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1378     }
1379     aj = Aloc->j;
1380     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1381 
1382     /* copy over the B part */
1383     Aloc = (Mat_SeqAIJ*)aij->B->data;
1384     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1385     row  = mat->rmap->rstart;
1386     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1387     ct   = cols;
1388     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1389     for (i=0; i<m; i++) {
1390       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1391       row++;
1392       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1393     }
1394     ierr = PetscFree(ct);CHKERRQ(ierr);
1395     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1396     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397     /*
1398        Everyone has to call to draw the matrix since the graphics waits are
1399        synchronized across all processors that share the PetscDraw object
1400     */
1401     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1402     if (!rank) {
1403       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1404       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1405     }
1406     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1407     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1408     ierr = MatDestroy(&A);CHKERRQ(ierr);
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1414 {
1415   PetscErrorCode ierr;
1416   PetscBool      iascii,isdraw,issocket,isbinary;
1417 
1418   PetscFunctionBegin;
1419   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1420   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1421   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1422   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1423   if (iascii || isdraw || isbinary || issocket) {
1424     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1430 {
1431   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1432   PetscErrorCode ierr;
1433   Vec            bb1 = 0;
1434   PetscBool      hasop;
1435 
1436   PetscFunctionBegin;
1437   if (flag == SOR_APPLY_UPPER) {
1438     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1439     PetscFunctionReturn(0);
1440   }
1441 
1442   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1443     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1444   }
1445 
1446   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1447     if (flag & SOR_ZERO_INITIAL_GUESS) {
1448       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1449       its--;
1450     }
1451 
1452     while (its--) {
1453       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1454       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1458       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1459 
1460       /* local sweep */
1461       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1462     }
1463   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1464     if (flag & SOR_ZERO_INITIAL_GUESS) {
1465       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1466       its--;
1467     }
1468     while (its--) {
1469       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471 
1472       /* update rhs: bb1 = bb - B*x */
1473       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1474       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1475 
1476       /* local sweep */
1477       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1478     }
1479   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1480     if (flag & SOR_ZERO_INITIAL_GUESS) {
1481       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1482       its--;
1483     }
1484     while (its--) {
1485       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487 
1488       /* update rhs: bb1 = bb - B*x */
1489       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1490       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1491 
1492       /* local sweep */
1493       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1494     }
1495   } else if (flag & SOR_EISENSTAT) {
1496     Vec xx1;
1497 
1498     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1499     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1500 
1501     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503     if (!mat->diag) {
1504       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1505       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1506     }
1507     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1508     if (hasop) {
1509       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1510     } else {
1511       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1512     }
1513     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1514 
1515     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1516 
1517     /* local sweep */
1518     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1519     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1520     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1521   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1522 
1523   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1524 
1525   matin->factorerrortype = mat->A->factorerrortype;
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1530 {
1531   Mat            aA,aB,Aperm;
1532   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1533   PetscScalar    *aa,*ba;
1534   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1535   PetscSF        rowsf,sf;
1536   IS             parcolp = NULL;
1537   PetscBool      done;
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1542   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1543   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1544   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1545 
1546   /* Invert row permutation to find out where my rows should go */
1547   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1548   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1549   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1550   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1551   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1552   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1553 
1554   /* Invert column permutation to find out where my columns should go */
1555   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1556   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1557   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1558   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1559   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1560   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1561   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1562 
1563   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1564   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1565   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1566 
1567   /* Find out where my gcols should go */
1568   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1569   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1570   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1571   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1572   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1573   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1574   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1575   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1576 
1577   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1578   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1579   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1580   for (i=0; i<m; i++) {
1581     PetscInt row = rdest[i],rowner;
1582     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1583     for (j=ai[i]; j<ai[i+1]; j++) {
1584       PetscInt cowner,col = cdest[aj[j]];
1585       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1586       if (rowner == cowner) dnnz[i]++;
1587       else onnz[i]++;
1588     }
1589     for (j=bi[i]; j<bi[i+1]; j++) {
1590       PetscInt cowner,col = gcdest[bj[j]];
1591       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1592       if (rowner == cowner) dnnz[i]++;
1593       else onnz[i]++;
1594     }
1595   }
1596   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1597   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1598   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1599   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1600   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1601 
1602   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1603   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1604   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1605   for (i=0; i<m; i++) {
1606     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1607     PetscInt j0,rowlen;
1608     rowlen = ai[i+1] - ai[i];
1609     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1610       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1611       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1612     }
1613     rowlen = bi[i+1] - bi[i];
1614     for (j0=j=0; j<rowlen; j0=j) {
1615       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1616       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1617     }
1618   }
1619   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1620   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1621   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1622   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1623   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1624   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1625   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1626   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1627   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1628   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1629   *B = Aperm;
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1634 {
1635   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1640   if (ghosts) *ghosts = aij->garray;
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1645 {
1646   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1647   Mat            A    = mat->A,B = mat->B;
1648   PetscErrorCode ierr;
1649   PetscReal      isend[5],irecv[5];
1650 
1651   PetscFunctionBegin;
1652   info->block_size = 1.0;
1653   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1654 
1655   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1656   isend[3] = info->memory;  isend[4] = info->mallocs;
1657 
1658   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1659 
1660   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1661   isend[3] += info->memory;  isend[4] += info->mallocs;
1662   if (flag == MAT_LOCAL) {
1663     info->nz_used      = isend[0];
1664     info->nz_allocated = isend[1];
1665     info->nz_unneeded  = isend[2];
1666     info->memory       = isend[3];
1667     info->mallocs      = isend[4];
1668   } else if (flag == MAT_GLOBAL_MAX) {
1669     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1670 
1671     info->nz_used      = irecv[0];
1672     info->nz_allocated = irecv[1];
1673     info->nz_unneeded  = irecv[2];
1674     info->memory       = irecv[3];
1675     info->mallocs      = irecv[4];
1676   } else if (flag == MAT_GLOBAL_SUM) {
1677     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1678 
1679     info->nz_used      = irecv[0];
1680     info->nz_allocated = irecv[1];
1681     info->nz_unneeded  = irecv[2];
1682     info->memory       = irecv[3];
1683     info->mallocs      = irecv[4];
1684   }
1685   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1686   info->fill_ratio_needed = 0;
1687   info->factor_mallocs    = 0;
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1692 {
1693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   switch (op) {
1698   case MAT_NEW_NONZERO_LOCATIONS:
1699   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1700   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1701   case MAT_KEEP_NONZERO_PATTERN:
1702   case MAT_NEW_NONZERO_LOCATION_ERR:
1703   case MAT_USE_INODES:
1704   case MAT_IGNORE_ZERO_ENTRIES:
1705     MatCheckPreallocated(A,1);
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1708     break;
1709   case MAT_ROW_ORIENTED:
1710     MatCheckPreallocated(A,1);
1711     a->roworiented = flg;
1712 
1713     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1714     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1715     break;
1716   case MAT_NEW_DIAGONALS:
1717     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1718     break;
1719   case MAT_IGNORE_OFF_PROC_ENTRIES:
1720     a->donotstash = flg;
1721     break;
1722   case MAT_SPD:
1723     A->spd_set = PETSC_TRUE;
1724     A->spd     = flg;
1725     if (flg) {
1726       A->symmetric                  = PETSC_TRUE;
1727       A->structurally_symmetric     = PETSC_TRUE;
1728       A->symmetric_set              = PETSC_TRUE;
1729       A->structurally_symmetric_set = PETSC_TRUE;
1730     }
1731     break;
1732   case MAT_SYMMETRIC:
1733     MatCheckPreallocated(A,1);
1734     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1735     break;
1736   case MAT_STRUCTURALLY_SYMMETRIC:
1737     MatCheckPreallocated(A,1);
1738     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_HERMITIAN:
1741     MatCheckPreallocated(A,1);
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_SYMMETRY_ETERNAL:
1745     MatCheckPreallocated(A,1);
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   case MAT_SUBMAT_SINGLEIS:
1749     A->submat_singleis = flg;
1750     break;
1751   case MAT_STRUCTURE_ONLY:
1752     /* The option is handled directly by MatSetOption() */
1753     break;
1754   default:
1755     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1761 {
1762   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1763   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1764   PetscErrorCode ierr;
1765   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1766   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1767   PetscInt       *cmap,*idx_p;
1768 
1769   PetscFunctionBegin;
1770   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1771   mat->getrowactive = PETSC_TRUE;
1772 
1773   if (!mat->rowvalues && (idx || v)) {
1774     /*
1775         allocate enough space to hold information from the longest row.
1776     */
1777     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1778     PetscInt   max = 1,tmp;
1779     for (i=0; i<matin->rmap->n; i++) {
1780       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1781       if (max < tmp) max = tmp;
1782     }
1783     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1784   }
1785 
1786   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1787   lrow = row - rstart;
1788 
1789   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1790   if (!v)   {pvA = 0; pvB = 0;}
1791   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1792   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1793   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1794   nztot = nzA + nzB;
1795 
1796   cmap = mat->garray;
1797   if (v  || idx) {
1798     if (nztot) {
1799       /* Sort by increasing column numbers, assuming A and B already sorted */
1800       PetscInt imark = -1;
1801       if (v) {
1802         *v = v_p = mat->rowvalues;
1803         for (i=0; i<nzB; i++) {
1804           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1805           else break;
1806         }
1807         imark = i;
1808         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1809         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1810       }
1811       if (idx) {
1812         *idx = idx_p = mat->rowindices;
1813         if (imark > -1) {
1814           for (i=0; i<imark; i++) {
1815             idx_p[i] = cmap[cworkB[i]];
1816           }
1817         } else {
1818           for (i=0; i<nzB; i++) {
1819             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1820             else break;
1821           }
1822           imark = i;
1823         }
1824         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1825         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1826       }
1827     } else {
1828       if (idx) *idx = 0;
1829       if (v)   *v   = 0;
1830     }
1831   }
1832   *nz  = nztot;
1833   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1834   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1839 {
1840   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1841 
1842   PetscFunctionBegin;
1843   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1844   aij->getrowactive = PETSC_FALSE;
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1849 {
1850   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1851   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1852   PetscErrorCode ierr;
1853   PetscInt       i,j,cstart = mat->cmap->rstart;
1854   PetscReal      sum = 0.0;
1855   MatScalar      *v;
1856 
1857   PetscFunctionBegin;
1858   if (aij->size == 1) {
1859     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1860   } else {
1861     if (type == NORM_FROBENIUS) {
1862       v = amat->a;
1863       for (i=0; i<amat->nz; i++) {
1864         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1865       }
1866       v = bmat->a;
1867       for (i=0; i<bmat->nz; i++) {
1868         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1869       }
1870       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1871       *norm = PetscSqrtReal(*norm);
1872       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1873     } else if (type == NORM_1) { /* max column norm */
1874       PetscReal *tmp,*tmp2;
1875       PetscInt  *jj,*garray = aij->garray;
1876       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1877       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1878       *norm = 0.0;
1879       v     = amat->a; jj = amat->j;
1880       for (j=0; j<amat->nz; j++) {
1881         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1882       }
1883       v = bmat->a; jj = bmat->j;
1884       for (j=0; j<bmat->nz; j++) {
1885         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1886       }
1887       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1888       for (j=0; j<mat->cmap->N; j++) {
1889         if (tmp2[j] > *norm) *norm = tmp2[j];
1890       }
1891       ierr = PetscFree(tmp);CHKERRQ(ierr);
1892       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1893       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1894     } else if (type == NORM_INFINITY) { /* max row norm */
1895       PetscReal ntemp = 0.0;
1896       for (j=0; j<aij->A->rmap->n; j++) {
1897         v   = amat->a + amat->i[j];
1898         sum = 0.0;
1899         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1900           sum += PetscAbsScalar(*v); v++;
1901         }
1902         v = bmat->a + bmat->i[j];
1903         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1904           sum += PetscAbsScalar(*v); v++;
1905         }
1906         if (sum > ntemp) ntemp = sum;
1907       }
1908       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1909       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1910     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1916 {
1917   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1918   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1919   PetscErrorCode ierr;
1920   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1921   PetscInt       cstart = A->cmap->rstart,ncol;
1922   Mat            B;
1923   MatScalar      *array;
1924 
1925   PetscFunctionBegin;
1926   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1927 
1928   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1929   ai = Aloc->i; aj = Aloc->j;
1930   bi = Bloc->i; bj = Bloc->j;
1931   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1932     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1933     PetscSFNode          *oloc;
1934     PETSC_UNUSED PetscSF sf;
1935 
1936     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1937     /* compute d_nnz for preallocation */
1938     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1939     for (i=0; i<ai[ma]; i++) {
1940       d_nnz[aj[i]]++;
1941       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1942     }
1943     /* compute local off-diagonal contributions */
1944     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1945     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1946     /* map those to global */
1947     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1948     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1949     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1950     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1951     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1952     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1953     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1954 
1955     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1956     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1957     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1958     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1959     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1960     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1961   } else {
1962     B    = *matout;
1963     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1964     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1965   }
1966 
1967   /* copy over the A part */
1968   array = Aloc->a;
1969   row   = A->rmap->rstart;
1970   for (i=0; i<ma; i++) {
1971     ncol = ai[i+1]-ai[i];
1972     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1973     row++;
1974     array += ncol; aj += ncol;
1975   }
1976   aj = Aloc->j;
1977   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1978 
1979   /* copy over the B part */
1980   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1981   array = Bloc->a;
1982   row   = A->rmap->rstart;
1983   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1984   cols_tmp = cols;
1985   for (i=0; i<mb; i++) {
1986     ncol = bi[i+1]-bi[i];
1987     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1988     row++;
1989     array += ncol; cols_tmp += ncol;
1990   }
1991   ierr = PetscFree(cols);CHKERRQ(ierr);
1992 
1993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1996     *matout = B;
1997   } else {
1998     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2004 {
2005   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2006   Mat            a    = aij->A,b = aij->B;
2007   PetscErrorCode ierr;
2008   PetscInt       s1,s2,s3;
2009 
2010   PetscFunctionBegin;
2011   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2012   if (rr) {
2013     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2014     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2015     /* Overlap communication with computation. */
2016     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2017   }
2018   if (ll) {
2019     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2020     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2021     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2022   }
2023   /* scale  the diagonal block */
2024   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2025 
2026   if (rr) {
2027     /* Do a scatter end and then right scale the off-diagonal block */
2028     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2029     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2030   }
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2035 {
2036   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2037   PetscErrorCode ierr;
2038 
2039   PetscFunctionBegin;
2040   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2045 {
2046   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2047   Mat            a,b,c,d;
2048   PetscBool      flg;
2049   PetscErrorCode ierr;
2050 
2051   PetscFunctionBegin;
2052   a = matA->A; b = matA->B;
2053   c = matB->A; d = matB->B;
2054 
2055   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2056   if (flg) {
2057     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2058   }
2059   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2064 {
2065   PetscErrorCode ierr;
2066   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2067   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2068 
2069   PetscFunctionBegin;
2070   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2071   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2072     /* because of the column compression in the off-processor part of the matrix a->B,
2073        the number of columns in a->B and b->B may be different, hence we cannot call
2074        the MatCopy() directly on the two parts. If need be, we can provide a more
2075        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2076        then copying the submatrices */
2077     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2078   } else {
2079     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2080     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2081   }
2082   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2087 {
2088   PetscErrorCode ierr;
2089 
2090   PetscFunctionBegin;
2091   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 /*
2096    Computes the number of nonzeros per row needed for preallocation when X and Y
2097    have different nonzero structure.
2098 */
2099 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2100 {
2101   PetscInt       i,j,k,nzx,nzy;
2102 
2103   PetscFunctionBegin;
2104   /* Set the number of nonzeros in the new matrix */
2105   for (i=0; i<m; i++) {
2106     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2107     nzx = xi[i+1] - xi[i];
2108     nzy = yi[i+1] - yi[i];
2109     nnz[i] = 0;
2110     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2111       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2112       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2113       nnz[i]++;
2114     }
2115     for (; k<nzy; k++) nnz[i]++;
2116   }
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2121 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2122 {
2123   PetscErrorCode ierr;
2124   PetscInt       m = Y->rmap->N;
2125   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2126   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2127 
2128   PetscFunctionBegin;
2129   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2134 {
2135   PetscErrorCode ierr;
2136   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2137   PetscBLASInt   bnz,one=1;
2138   Mat_SeqAIJ     *x,*y;
2139 
2140   PetscFunctionBegin;
2141   if (str == SAME_NONZERO_PATTERN) {
2142     PetscScalar alpha = a;
2143     x    = (Mat_SeqAIJ*)xx->A->data;
2144     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2145     y    = (Mat_SeqAIJ*)yy->A->data;
2146     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2147     x    = (Mat_SeqAIJ*)xx->B->data;
2148     y    = (Mat_SeqAIJ*)yy->B->data;
2149     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2150     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2151     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2152   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2153     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2154   } else {
2155     Mat      B;
2156     PetscInt *nnz_d,*nnz_o;
2157     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2158     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2159     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2160     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2161     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2162     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2163     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2164     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2165     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2166     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2167     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2168     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2169     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2170     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2176 
2177 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2178 {
2179 #if defined(PETSC_USE_COMPLEX)
2180   PetscErrorCode ierr;
2181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2185   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2186 #else
2187   PetscFunctionBegin;
2188 #endif
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2193 {
2194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2195   PetscErrorCode ierr;
2196 
2197   PetscFunctionBegin;
2198   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2199   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207 
2208   PetscFunctionBegin;
2209   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2210   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2215 {
2216   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2217   PetscErrorCode ierr;
2218   PetscInt       i,*idxb = 0;
2219   PetscScalar    *va,*vb;
2220   Vec            vtmp;
2221 
2222   PetscFunctionBegin;
2223   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2224   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2225   if (idx) {
2226     for (i=0; i<A->rmap->n; i++) {
2227       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2228     }
2229   }
2230 
2231   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2232   if (idx) {
2233     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2234   }
2235   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2236   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2237 
2238   for (i=0; i<A->rmap->n; i++) {
2239     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2240       va[i] = vb[i];
2241       if (idx) idx[i] = a->garray[idxb[i]];
2242     }
2243   }
2244 
2245   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2246   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2247   ierr = PetscFree(idxb);CHKERRQ(ierr);
2248   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2253 {
2254   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2255   PetscErrorCode ierr;
2256   PetscInt       i,*idxb = 0;
2257   PetscScalar    *va,*vb;
2258   Vec            vtmp;
2259 
2260   PetscFunctionBegin;
2261   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2262   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2263   if (idx) {
2264     for (i=0; i<A->cmap->n; i++) {
2265       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2266     }
2267   }
2268 
2269   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2270   if (idx) {
2271     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2272   }
2273   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2274   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2275 
2276   for (i=0; i<A->rmap->n; i++) {
2277     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2278       va[i] = vb[i];
2279       if (idx) idx[i] = a->garray[idxb[i]];
2280     }
2281   }
2282 
2283   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2284   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2285   ierr = PetscFree(idxb);CHKERRQ(ierr);
2286   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2291 {
2292   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2293   PetscInt       n      = A->rmap->n;
2294   PetscInt       cstart = A->cmap->rstart;
2295   PetscInt       *cmap  = mat->garray;
2296   PetscInt       *diagIdx, *offdiagIdx;
2297   Vec            diagV, offdiagV;
2298   PetscScalar    *a, *diagA, *offdiagA;
2299   PetscInt       r;
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2304   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2305   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2306   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2307   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2308   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2309   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2310   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2311   for (r = 0; r < n; ++r) {
2312     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2313       a[r]   = diagA[r];
2314       idx[r] = cstart + diagIdx[r];
2315     } else {
2316       a[r]   = offdiagA[r];
2317       idx[r] = cmap[offdiagIdx[r]];
2318     }
2319   }
2320   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2321   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2322   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2323   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2324   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2325   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2330 {
2331   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2332   PetscInt       n      = A->rmap->n;
2333   PetscInt       cstart = A->cmap->rstart;
2334   PetscInt       *cmap  = mat->garray;
2335   PetscInt       *diagIdx, *offdiagIdx;
2336   Vec            diagV, offdiagV;
2337   PetscScalar    *a, *diagA, *offdiagA;
2338   PetscInt       r;
2339   PetscErrorCode ierr;
2340 
2341   PetscFunctionBegin;
2342   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2343   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2344   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2345   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2346   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2347   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2348   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2349   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2350   for (r = 0; r < n; ++r) {
2351     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2352       a[r]   = diagA[r];
2353       idx[r] = cstart + diagIdx[r];
2354     } else {
2355       a[r]   = offdiagA[r];
2356       idx[r] = cmap[offdiagIdx[r]];
2357     }
2358   }
2359   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2360   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2361   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2362   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2363   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2364   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2369 {
2370   PetscErrorCode ierr;
2371   Mat            *dummy;
2372 
2373   PetscFunctionBegin;
2374   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2375   *newmat = *dummy;
2376   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2381 {
2382   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2383   PetscErrorCode ierr;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2387   A->factorerrortype = a->A->factorerrortype;
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2392 {
2393   PetscErrorCode ierr;
2394   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2395 
2396   PetscFunctionBegin;
2397   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2398   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2399   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2400   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2405 {
2406   PetscFunctionBegin;
2407   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2408   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 /*@
2413    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2414 
2415    Collective on Mat
2416 
2417    Input Parameters:
2418 +    A - the matrix
2419 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2420 
2421  Level: advanced
2422 
2423 @*/
2424 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2425 {
2426   PetscErrorCode       ierr;
2427 
2428   PetscFunctionBegin;
2429   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2434 {
2435   PetscErrorCode       ierr;
2436   PetscBool            sc = PETSC_FALSE,flg;
2437 
2438   PetscFunctionBegin;
2439   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2440   ierr = PetscObjectOptionsBegin((PetscObject)A);
2441     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2442     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2443     if (flg) {
2444       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2445     }
2446   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2451 {
2452   PetscErrorCode ierr;
2453   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2454   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2455 
2456   PetscFunctionBegin;
2457   if (!Y->preallocated) {
2458     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2459   } else if (!aij->nz) {
2460     PetscInt nonew = aij->nonew;
2461     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2462     aij->nonew = nonew;
2463   }
2464   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2469 {
2470   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2471   PetscErrorCode ierr;
2472 
2473   PetscFunctionBegin;
2474   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2475   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2476   if (d) {
2477     PetscInt rstart;
2478     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2479     *d += rstart;
2480 
2481   }
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 
2486 /* -------------------------------------------------------------------*/
2487 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2488                                        MatGetRow_MPIAIJ,
2489                                        MatRestoreRow_MPIAIJ,
2490                                        MatMult_MPIAIJ,
2491                                 /* 4*/ MatMultAdd_MPIAIJ,
2492                                        MatMultTranspose_MPIAIJ,
2493                                        MatMultTransposeAdd_MPIAIJ,
2494                                        0,
2495                                        0,
2496                                        0,
2497                                 /*10*/ 0,
2498                                        0,
2499                                        0,
2500                                        MatSOR_MPIAIJ,
2501                                        MatTranspose_MPIAIJ,
2502                                 /*15*/ MatGetInfo_MPIAIJ,
2503                                        MatEqual_MPIAIJ,
2504                                        MatGetDiagonal_MPIAIJ,
2505                                        MatDiagonalScale_MPIAIJ,
2506                                        MatNorm_MPIAIJ,
2507                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2508                                        MatAssemblyEnd_MPIAIJ,
2509                                        MatSetOption_MPIAIJ,
2510                                        MatZeroEntries_MPIAIJ,
2511                                 /*24*/ MatZeroRows_MPIAIJ,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                        0,
2516                                 /*29*/ MatSetUp_MPIAIJ,
2517                                        0,
2518                                        0,
2519                                        MatGetDiagonalBlock_MPIAIJ,
2520                                        0,
2521                                 /*34*/ MatDuplicate_MPIAIJ,
2522                                        0,
2523                                        0,
2524                                        0,
2525                                        0,
2526                                 /*39*/ MatAXPY_MPIAIJ,
2527                                        MatCreateSubMatrices_MPIAIJ,
2528                                        MatIncreaseOverlap_MPIAIJ,
2529                                        MatGetValues_MPIAIJ,
2530                                        MatCopy_MPIAIJ,
2531                                 /*44*/ MatGetRowMax_MPIAIJ,
2532                                        MatScale_MPIAIJ,
2533                                        MatShift_MPIAIJ,
2534                                        MatDiagonalSet_MPIAIJ,
2535                                        MatZeroRowsColumns_MPIAIJ,
2536                                 /*49*/ MatSetRandom_MPIAIJ,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                        0,
2541                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2542                                        0,
2543                                        MatSetUnfactored_MPIAIJ,
2544                                        MatPermute_MPIAIJ,
2545                                        0,
2546                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2547                                        MatDestroy_MPIAIJ,
2548                                        MatView_MPIAIJ,
2549                                        0,
2550                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2551                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2552                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2553                                        0,
2554                                        0,
2555                                        0,
2556                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2557                                        MatGetRowMinAbs_MPIAIJ,
2558                                        0,
2559                                        0,
2560                                        0,
2561                                        0,
2562                                 /*75*/ MatFDColoringApply_AIJ,
2563                                        MatSetFromOptions_MPIAIJ,
2564                                        0,
2565                                        0,
2566                                        MatFindZeroDiagonals_MPIAIJ,
2567                                 /*80*/ 0,
2568                                        0,
2569                                        0,
2570                                 /*83*/ MatLoad_MPIAIJ,
2571                                        MatIsSymmetric_MPIAIJ,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                        0,
2576                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2577                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2578                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2579                                        MatPtAP_MPIAIJ_MPIAIJ,
2580                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2581                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2582                                        0,
2583                                        0,
2584                                        0,
2585                                        0,
2586                                 /*99*/ 0,
2587                                        0,
2588                                        0,
2589                                        MatConjugate_MPIAIJ,
2590                                        0,
2591                                 /*104*/MatSetValuesRow_MPIAIJ,
2592                                        MatRealPart_MPIAIJ,
2593                                        MatImaginaryPart_MPIAIJ,
2594                                        0,
2595                                        0,
2596                                 /*109*/0,
2597                                        0,
2598                                        MatGetRowMin_MPIAIJ,
2599                                        0,
2600                                        MatMissingDiagonal_MPIAIJ,
2601                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2602                                        0,
2603                                        MatGetGhosts_MPIAIJ,
2604                                        0,
2605                                        0,
2606                                 /*119*/0,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                        MatGetMultiProcBlock_MPIAIJ,
2611                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2612                                        MatGetColumnNorms_MPIAIJ,
2613                                        MatInvertBlockDiagonal_MPIAIJ,
2614                                        0,
2615                                        MatCreateSubMatricesMPI_MPIAIJ,
2616                                 /*129*/0,
2617                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2618                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2619                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2620                                        0,
2621                                 /*134*/0,
2622                                        0,
2623                                        MatRARt_MPIAIJ_MPIAIJ,
2624                                        0,
2625                                        0,
2626                                 /*139*/MatSetBlockSizes_MPIAIJ,
2627                                        0,
2628                                        0,
2629                                        MatFDColoringSetUp_MPIXAIJ,
2630                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2631                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2632 };
2633 
2634 /* ----------------------------------------------------------------------------------------*/
2635 
2636 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2637 {
2638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2639   PetscErrorCode ierr;
2640 
2641   PetscFunctionBegin;
2642   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2643   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2648 {
2649   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2650   PetscErrorCode ierr;
2651 
2652   PetscFunctionBegin;
2653   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2654   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2659 {
2660   Mat_MPIAIJ     *b;
2661   PetscErrorCode ierr;
2662 
2663   PetscFunctionBegin;
2664   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2665   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2666   b = (Mat_MPIAIJ*)B->data;
2667 
2668 #if defined(PETSC_USE_CTABLE)
2669   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2670 #else
2671   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2672 #endif
2673   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2674   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2675   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2676 
2677   /* Because the B will have been resized we simply destroy it and create a new one each time */
2678   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2679   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2680   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2681   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2682   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2683   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2684 
2685   if (!B->preallocated) {
2686     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2687     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2688     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2689     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2690     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2691   }
2692 
2693   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2694   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2695   B->preallocated  = PETSC_TRUE;
2696   B->was_assembled = PETSC_FALSE;
2697   B->assembled     = PETSC_FALSE;;
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2702 {
2703   Mat_MPIAIJ     *b;
2704   PetscErrorCode ierr;
2705 
2706   PetscFunctionBegin;
2707   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2708   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2709   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2710   b = (Mat_MPIAIJ*)B->data;
2711 
2712 #if defined(PETSC_USE_CTABLE)
2713   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2714 #else
2715   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2716 #endif
2717   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2718   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2719   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2720 
2721   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2722   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2723   B->preallocated  = PETSC_TRUE;
2724   B->was_assembled = PETSC_FALSE;
2725   B->assembled = PETSC_FALSE;
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2730 {
2731   Mat            mat;
2732   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2733   PetscErrorCode ierr;
2734 
2735   PetscFunctionBegin;
2736   *newmat = 0;
2737   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2738   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2739   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2740   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2741   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2742   a       = (Mat_MPIAIJ*)mat->data;
2743 
2744   mat->factortype   = matin->factortype;
2745   mat->assembled    = PETSC_TRUE;
2746   mat->insertmode   = NOT_SET_VALUES;
2747   mat->preallocated = PETSC_TRUE;
2748 
2749   a->size         = oldmat->size;
2750   a->rank         = oldmat->rank;
2751   a->donotstash   = oldmat->donotstash;
2752   a->roworiented  = oldmat->roworiented;
2753   a->rowindices   = 0;
2754   a->rowvalues    = 0;
2755   a->getrowactive = PETSC_FALSE;
2756 
2757   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2758   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2759 
2760   if (oldmat->colmap) {
2761 #if defined(PETSC_USE_CTABLE)
2762     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2763 #else
2764     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2765     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2766     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2767 #endif
2768   } else a->colmap = 0;
2769   if (oldmat->garray) {
2770     PetscInt len;
2771     len  = oldmat->B->cmap->n;
2772     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2773     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2774     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2775   } else a->garray = 0;
2776 
2777   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2778   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2779   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2780   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2781   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2782   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2783   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2784   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2785   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2786   *newmat = mat;
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2791 {
2792   PetscScalar    *vals,*svals;
2793   MPI_Comm       comm;
2794   PetscErrorCode ierr;
2795   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2796   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2797   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2798   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2799   PetscInt       cend,cstart,n,*rowners;
2800   int            fd;
2801   PetscInt       bs = newMat->rmap->bs;
2802 
2803   PetscFunctionBegin;
2804   /* force binary viewer to load .info file if it has not yet done so */
2805   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2806   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2807   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2808   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2809   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2810   if (!rank) {
2811     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2812     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2813     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2814   }
2815 
2816   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2817   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2818   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2819   if (bs < 0) bs = 1;
2820 
2821   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2822   M    = header[1]; N = header[2];
2823 
2824   /* If global sizes are set, check if they are consistent with that given in the file */
2825   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2826   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2827 
2828   /* determine ownership of all (block) rows */
2829   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2830   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2831   else m = newMat->rmap->n; /* Set by user */
2832 
2833   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2834   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2835 
2836   /* First process needs enough room for process with most rows */
2837   if (!rank) {
2838     mmax = rowners[1];
2839     for (i=2; i<=size; i++) {
2840       mmax = PetscMax(mmax, rowners[i]);
2841     }
2842   } else mmax = -1;             /* unused, but compilers complain */
2843 
2844   rowners[0] = 0;
2845   for (i=2; i<=size; i++) {
2846     rowners[i] += rowners[i-1];
2847   }
2848   rstart = rowners[rank];
2849   rend   = rowners[rank+1];
2850 
2851   /* distribute row lengths to all processors */
2852   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2853   if (!rank) {
2854     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2855     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2856     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2857     for (j=0; j<m; j++) {
2858       procsnz[0] += ourlens[j];
2859     }
2860     for (i=1; i<size; i++) {
2861       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2862       /* calculate the number of nonzeros on each processor */
2863       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2864         procsnz[i] += rowlengths[j];
2865       }
2866       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2867     }
2868     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2869   } else {
2870     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2871   }
2872 
2873   if (!rank) {
2874     /* determine max buffer needed and allocate it */
2875     maxnz = 0;
2876     for (i=0; i<size; i++) {
2877       maxnz = PetscMax(maxnz,procsnz[i]);
2878     }
2879     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2880 
2881     /* read in my part of the matrix column indices  */
2882     nz   = procsnz[0];
2883     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2884     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2885 
2886     /* read in every one elses and ship off */
2887     for (i=1; i<size; i++) {
2888       nz   = procsnz[i];
2889       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2890       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2891     }
2892     ierr = PetscFree(cols);CHKERRQ(ierr);
2893   } else {
2894     /* determine buffer space needed for message */
2895     nz = 0;
2896     for (i=0; i<m; i++) {
2897       nz += ourlens[i];
2898     }
2899     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2900 
2901     /* receive message of column indices*/
2902     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2903   }
2904 
2905   /* determine column ownership if matrix is not square */
2906   if (N != M) {
2907     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2908     else n = newMat->cmap->n;
2909     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2910     cstart = cend - n;
2911   } else {
2912     cstart = rstart;
2913     cend   = rend;
2914     n      = cend - cstart;
2915   }
2916 
2917   /* loop over local rows, determining number of off diagonal entries */
2918   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2919   jj   = 0;
2920   for (i=0; i<m; i++) {
2921     for (j=0; j<ourlens[i]; j++) {
2922       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2923       jj++;
2924     }
2925   }
2926 
2927   for (i=0; i<m; i++) {
2928     ourlens[i] -= offlens[i];
2929   }
2930   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2931 
2932   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2933 
2934   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2935 
2936   for (i=0; i<m; i++) {
2937     ourlens[i] += offlens[i];
2938   }
2939 
2940   if (!rank) {
2941     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2942 
2943     /* read in my part of the matrix numerical values  */
2944     nz   = procsnz[0];
2945     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2946 
2947     /* insert into matrix */
2948     jj      = rstart;
2949     smycols = mycols;
2950     svals   = vals;
2951     for (i=0; i<m; i++) {
2952       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2953       smycols += ourlens[i];
2954       svals   += ourlens[i];
2955       jj++;
2956     }
2957 
2958     /* read in other processors and ship out */
2959     for (i=1; i<size; i++) {
2960       nz   = procsnz[i];
2961       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2962       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2963     }
2964     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2965   } else {
2966     /* receive numeric values */
2967     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2968 
2969     /* receive message of values*/
2970     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2971 
2972     /* insert into matrix */
2973     jj      = rstart;
2974     smycols = mycols;
2975     svals   = vals;
2976     for (i=0; i<m; i++) {
2977       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2978       smycols += ourlens[i];
2979       svals   += ourlens[i];
2980       jj++;
2981     }
2982   }
2983   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2984   ierr = PetscFree(vals);CHKERRQ(ierr);
2985   ierr = PetscFree(mycols);CHKERRQ(ierr);
2986   ierr = PetscFree(rowners);CHKERRQ(ierr);
2987   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2988   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 /* Not scalable because of ISAllGather() unless getting all columns. */
2993 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2994 {
2995   PetscErrorCode ierr;
2996   IS             iscol_local;
2997   PetscBool      isstride;
2998   PetscMPIInt    lisstride=0,gisstride;
2999 
3000   PetscFunctionBegin;
3001   /* check if we are grabbing all columns*/
3002   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3003 
3004   if (isstride) {
3005     PetscInt  start,len,mstart,mlen;
3006     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3007     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3008     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3009     if (mstart == start && mlen-mstart == len) lisstride = 1;
3010   }
3011 
3012   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3013   if (gisstride) {
3014     PetscInt N;
3015     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3016     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3017     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3018     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3019   } else {
3020     PetscInt cbs;
3021     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3022     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3023     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3024   }
3025 
3026   *isseq = iscol_local;
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 /*
3031  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3032  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3033 
3034  Input Parameters:
3035    mat - matrix
3036    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3037            i.e., mat->rstart <= isrow[i] < mat->rend
3038    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3039            i.e., mat->cstart <= iscol[i] < mat->cend
3040  Output Parameter:
3041    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3042    iscol_o - sequential column index set for retrieving mat->B
3043    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3044  */
3045 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3046 {
3047   PetscErrorCode ierr;
3048   Vec            x,cmap;
3049   const PetscInt *is_idx;
3050   PetscScalar    *xarray,*cmaparray;
3051   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3052   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3053   Mat            B=a->B;
3054   Vec            lvec=a->lvec,lcmap;
3055   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3056   MPI_Comm       comm;
3057   VecScatter     Mvctx=a->Mvctx;
3058 
3059   PetscFunctionBegin;
3060   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3061   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3062 
3063   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3064   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3065   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3066   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3067   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3068 
3069   /* Get start indices */
3070   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3071   isstart -= ncols;
3072   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3073 
3074   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3075   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3076   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3077   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3078   for (i=0; i<ncols; i++) {
3079     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3080     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3081     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3082   }
3083   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3084   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3085   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3086 
3087   /* Get iscol_d */
3088   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3089   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3090   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3091 
3092   /* Get isrow_d */
3093   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3094   rstart = mat->rmap->rstart;
3095   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3096   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3097   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3098   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3099 
3100   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3101   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3102   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3103 
3104   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3105   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3106   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3107 
3108   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3109 
3110   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3111   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3112 
3113   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3114   /* off-process column indices */
3115   count = 0;
3116   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3117   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3118 
3119   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3120   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3121   for (i=0; i<Bn; i++) {
3122     if (PetscRealPart(xarray[i]) > -1.0) {
3123       idx[count]     = i;                   /* local column index in off-diagonal part B */
3124       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3125       count++;
3126     }
3127   }
3128   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3129   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3130 
3131   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3132   /* cannot ensure iscol_o has same blocksize as iscol! */
3133 
3134   ierr = PetscFree(idx);CHKERRQ(ierr);
3135   *garray = cmap1;
3136 
3137   ierr = VecDestroy(&x);CHKERRQ(ierr);
3138   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3139   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3140   PetscFunctionReturn(0);
3141 }
3142 
3143 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3144 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3145 {
3146   PetscErrorCode ierr;
3147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3148   Mat            M = NULL;
3149   MPI_Comm       comm;
3150   IS             iscol_d,isrow_d,iscol_o;
3151   Mat            Asub = NULL,Bsub = NULL;
3152   PetscInt       n;
3153 
3154   PetscFunctionBegin;
3155   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3156 
3157   if (call == MAT_REUSE_MATRIX) {
3158     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3159     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3160     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3161 
3162     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3163     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3164 
3165     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3166     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3167 
3168     /* Update diagonal and off-diagonal portions of submat */
3169     asub = (Mat_MPIAIJ*)(*submat)->data;
3170     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3171     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3172     if (n) {
3173       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3174     }
3175     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3176     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3177 
3178   } else { /* call == MAT_INITIAL_MATRIX) */
3179     const PetscInt *garray;
3180     PetscInt        BsubN;
3181 
3182     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3183     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3184 
3185     /* Create local submatrices Asub and Bsub */
3186     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3187     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3188 
3189     /* Create submatrix M */
3190     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3191 
3192     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3193     asub = (Mat_MPIAIJ*)M->data;
3194 
3195     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3196     n = asub->B->cmap->N;
3197     if (BsubN > n) {
3198       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3199       const PetscInt *idx;
3200       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3201       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3202 
3203       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3204       j = 0;
3205       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3206       for (i=0; i<n; i++) {
3207         if (j >= BsubN) break;
3208         while (subgarray[i] > garray[j]) j++;
3209 
3210         if (subgarray[i] == garray[j]) {
3211           idx_new[i] = idx[j++];
3212         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3213       }
3214       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3215 
3216       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3217       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3218 
3219     } else if (BsubN < n) {
3220       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3221     }
3222 
3223     ierr = PetscFree(garray);CHKERRQ(ierr);
3224     *submat = M;
3225 
3226     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3227     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3228     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3229 
3230     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3231     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3232 
3233     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3234     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3235   }
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3240 {
3241   PetscErrorCode ierr;
3242   IS             iscol_local=NULL,isrow_d;
3243   PetscInt       csize;
3244   PetscInt       n,i,j,start,end;
3245   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3246   MPI_Comm       comm;
3247 
3248   PetscFunctionBegin;
3249   /* If isrow has same processor distribution as mat,
3250      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3251   if (call == MAT_REUSE_MATRIX) {
3252     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3253     if (isrow_d) {
3254       sameRowDist  = PETSC_TRUE;
3255       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3256     } else {
3257       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3258       if (iscol_local) {
3259         sameRowDist  = PETSC_TRUE;
3260         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3261       }
3262     }
3263   } else {
3264     /* Check if isrow has same processor distribution as mat */
3265     sameDist[0] = PETSC_FALSE;
3266     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3267     if (!n) {
3268       sameDist[0] = PETSC_TRUE;
3269     } else {
3270       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3271       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3272       if (i >= start && j < end) {
3273         sameDist[0] = PETSC_TRUE;
3274       }
3275     }
3276 
3277     /* Check if iscol has same processor distribution as mat */
3278     sameDist[1] = PETSC_FALSE;
3279     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3280     if (!n) {
3281       sameDist[1] = PETSC_TRUE;
3282     } else {
3283       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3284       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3285       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3286     }
3287 
3288     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3289     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3290     sameRowDist = tsameDist[0];
3291   }
3292 
3293   if (sameRowDist) {
3294     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3295       /* isrow and iscol have same processor distribution as mat */
3296       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3297       PetscFunctionReturn(0);
3298     } else { /* sameRowDist */
3299       /* isrow has same processor distribution as mat */
3300       if (call == MAT_INITIAL_MATRIX) {
3301         PetscBool sorted;
3302         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3303         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3304         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3305         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3306 
3307         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3308         if (sorted) {
3309           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3310           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3311           PetscFunctionReturn(0);
3312         }
3313       } else { /* call == MAT_REUSE_MATRIX */
3314         IS    iscol_sub;
3315         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3316         if (iscol_sub) {
3317           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3318           PetscFunctionReturn(0);
3319         }
3320       }
3321     }
3322   }
3323 
3324   /* General case: iscol -> iscol_local which has global size of iscol */
3325   if (call == MAT_REUSE_MATRIX) {
3326     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3327     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3328   } else {
3329     if (!iscol_local) {
3330       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3331     }
3332   }
3333 
3334   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3335   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3336 
3337   if (call == MAT_INITIAL_MATRIX) {
3338     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3339     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3340   }
3341   PetscFunctionReturn(0);
3342 }
3343 
3344 /*@C
3345      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3346          and "off-diagonal" part of the matrix in CSR format.
3347 
3348    Collective on MPI_Comm
3349 
3350    Input Parameters:
3351 +  comm - MPI communicator
3352 .  A - "diagonal" portion of matrix
3353 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3354 -  garray - global index of B columns
3355 
3356    Output Parameter:
3357 .   mat - the matrix, with input A as its local diagonal matrix
3358    Level: advanced
3359 
3360    Notes:
3361        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3362        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3363 
3364 .seealso: MatCreateMPIAIJWithSplitArrays()
3365 @*/
3366 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3367 {
3368   PetscErrorCode ierr;
3369   Mat_MPIAIJ     *maij;
3370   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3371   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3372   PetscScalar    *oa=b->a;
3373   Mat            Bnew;
3374   PetscInt       m,n,N;
3375 
3376   PetscFunctionBegin;
3377   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3378   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3379   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3380   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3381   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3382   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3383 
3384   /* Get global columns of mat */
3385   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3386 
3387   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3388   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3389   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3390   maij = (Mat_MPIAIJ*)(*mat)->data;
3391 
3392   (*mat)->preallocated = PETSC_TRUE;
3393 
3394   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3395   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3396 
3397   /* Set A as diagonal portion of *mat */
3398   maij->A = A;
3399 
3400   nz = oi[m];
3401   for (i=0; i<nz; i++) {
3402     col   = oj[i];
3403     oj[i] = garray[col];
3404   }
3405 
3406    /* Set Bnew as off-diagonal portion of *mat */
3407   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3408   bnew        = (Mat_SeqAIJ*)Bnew->data;
3409   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3410   maij->B     = Bnew;
3411 
3412   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3413 
3414   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3415   b->free_a       = PETSC_FALSE;
3416   b->free_ij      = PETSC_FALSE;
3417   ierr = MatDestroy(&B);CHKERRQ(ierr);
3418 
3419   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3420   bnew->free_a       = PETSC_TRUE;
3421   bnew->free_ij      = PETSC_TRUE;
3422 
3423   /* condense columns of maij->B */
3424   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3425   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3426   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3427   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3428   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3429   PetscFunctionReturn(0);
3430 }
3431 
3432 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3433 
3434 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3435 {
3436   PetscErrorCode ierr;
3437   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3438   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3439   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3440   Mat            M,Msub,B=a->B;
3441   MatScalar      *aa;
3442   Mat_SeqAIJ     *aij;
3443   PetscInt       *garray = a->garray,*colsub,Ncols;
3444   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3445   IS             iscol_sub,iscmap;
3446   const PetscInt *is_idx,*cmap;
3447   PetscBool      allcolumns=PETSC_FALSE;
3448   MPI_Comm       comm;
3449 
3450   PetscFunctionBegin;
3451   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3452 
3453   if (call == MAT_REUSE_MATRIX) {
3454     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3455     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3456     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3457 
3458     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3459     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3460 
3461     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3462     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3463 
3464     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3465 
3466   } else { /* call == MAT_INITIAL_MATRIX) */
3467     PetscBool flg;
3468 
3469     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3470     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3471 
3472     /* (1) iscol -> nonscalable iscol_local */
3473     /* Check for special case: each processor gets entire matrix columns */
3474     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3475     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3476     if (allcolumns) {
3477       iscol_sub = iscol_local;
3478       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3479       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3480 
3481     } else {
3482       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3483       PetscInt *idx,*cmap1,k;
3484       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3485       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3486       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3487       count = 0;
3488       k     = 0;
3489       for (i=0; i<Ncols; i++) {
3490         j = is_idx[i];
3491         if (j >= cstart && j < cend) {
3492           /* diagonal part of mat */
3493           idx[count]     = j;
3494           cmap1[count++] = i; /* column index in submat */
3495         } else if (Bn) {
3496           /* off-diagonal part of mat */
3497           if (j == garray[k]) {
3498             idx[count]     = j;
3499             cmap1[count++] = i;  /* column index in submat */
3500           } else if (j > garray[k]) {
3501             while (j > garray[k] && k < Bn-1) k++;
3502             if (j == garray[k]) {
3503               idx[count]     = j;
3504               cmap1[count++] = i; /* column index in submat */
3505             }
3506           }
3507         }
3508       }
3509       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3510 
3511       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3512       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3513       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3514 
3515       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3516     }
3517 
3518     /* (3) Create sequential Msub */
3519     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3520   }
3521 
3522   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3523   aij  = (Mat_SeqAIJ*)(Msub)->data;
3524   ii   = aij->i;
3525   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3526 
3527   /*
3528       m - number of local rows
3529       Ncols - number of columns (same on all processors)
3530       rstart - first row in new global matrix generated
3531   */
3532   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3533 
3534   if (call == MAT_INITIAL_MATRIX) {
3535     /* (4) Create parallel newmat */
3536     PetscMPIInt    rank,size;
3537     PetscInt       csize;
3538 
3539     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3540     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3541 
3542     /*
3543         Determine the number of non-zeros in the diagonal and off-diagonal
3544         portions of the matrix in order to do correct preallocation
3545     */
3546 
3547     /* first get start and end of "diagonal" columns */
3548     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3549     if (csize == PETSC_DECIDE) {
3550       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3551       if (mglobal == Ncols) { /* square matrix */
3552         nlocal = m;
3553       } else {
3554         nlocal = Ncols/size + ((Ncols % size) > rank);
3555       }
3556     } else {
3557       nlocal = csize;
3558     }
3559     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3560     rstart = rend - nlocal;
3561     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3562 
3563     /* next, compute all the lengths */
3564     jj    = aij->j;
3565     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3566     olens = dlens + m;
3567     for (i=0; i<m; i++) {
3568       jend = ii[i+1] - ii[i];
3569       olen = 0;
3570       dlen = 0;
3571       for (j=0; j<jend; j++) {
3572         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3573         else dlen++;
3574         jj++;
3575       }
3576       olens[i] = olen;
3577       dlens[i] = dlen;
3578     }
3579 
3580     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3581     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3582 
3583     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3584     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3585     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3586     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3587     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3588     ierr = PetscFree(dlens);CHKERRQ(ierr);
3589 
3590   } else { /* call == MAT_REUSE_MATRIX */
3591     M    = *newmat;
3592     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3593     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3594     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3595     /*
3596          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3597        rather than the slower MatSetValues().
3598     */
3599     M->was_assembled = PETSC_TRUE;
3600     M->assembled     = PETSC_FALSE;
3601   }
3602 
3603   /* (5) Set values of Msub to *newmat */
3604   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3605   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3606 
3607   jj   = aij->j;
3608   aa   = aij->a;
3609   for (i=0; i<m; i++) {
3610     row = rstart + i;
3611     nz  = ii[i+1] - ii[i];
3612     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3613     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3614     jj += nz; aa += nz;
3615   }
3616   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3617 
3618   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3620 
3621   ierr = PetscFree(colsub);CHKERRQ(ierr);
3622 
3623   /* save Msub, iscol_sub and iscmap used in processor for next request */
3624   if (call ==  MAT_INITIAL_MATRIX) {
3625     *newmat = M;
3626     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3627     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3628 
3629     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3630     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3631 
3632     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3633     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3634 
3635     if (iscol_local) {
3636       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3637       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3638     }
3639   }
3640   PetscFunctionReturn(0);
3641 }
3642 
3643 /*
3644     Not great since it makes two copies of the submatrix, first an SeqAIJ
3645   in local and then by concatenating the local matrices the end result.
3646   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3647 
3648   Note: This requires a sequential iscol with all indices.
3649 */
3650 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3651 {
3652   PetscErrorCode ierr;
3653   PetscMPIInt    rank,size;
3654   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3655   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3656   Mat            M,Mreuse;
3657   MatScalar      *aa,*vwork;
3658   MPI_Comm       comm;
3659   Mat_SeqAIJ     *aij;
3660   PetscBool      colflag,allcolumns=PETSC_FALSE;
3661 
3662   PetscFunctionBegin;
3663   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3664   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3665   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3666 
3667   /* Check for special case: each processor gets entire matrix columns */
3668   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3669   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3670   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3671 
3672   if (call ==  MAT_REUSE_MATRIX) {
3673     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3674     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3675     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3676   } else {
3677     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3678   }
3679 
3680   /*
3681       m - number of local rows
3682       n - number of columns (same on all processors)
3683       rstart - first row in new global matrix generated
3684   */
3685   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3686   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3687   if (call == MAT_INITIAL_MATRIX) {
3688     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3689     ii  = aij->i;
3690     jj  = aij->j;
3691 
3692     /*
3693         Determine the number of non-zeros in the diagonal and off-diagonal
3694         portions of the matrix in order to do correct preallocation
3695     */
3696 
3697     /* first get start and end of "diagonal" columns */
3698     if (csize == PETSC_DECIDE) {
3699       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3700       if (mglobal == n) { /* square matrix */
3701         nlocal = m;
3702       } else {
3703         nlocal = n/size + ((n % size) > rank);
3704       }
3705     } else {
3706       nlocal = csize;
3707     }
3708     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3709     rstart = rend - nlocal;
3710     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3711 
3712     /* next, compute all the lengths */
3713     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3714     olens = dlens + m;
3715     for (i=0; i<m; i++) {
3716       jend = ii[i+1] - ii[i];
3717       olen = 0;
3718       dlen = 0;
3719       for (j=0; j<jend; j++) {
3720         if (*jj < rstart || *jj >= rend) olen++;
3721         else dlen++;
3722         jj++;
3723       }
3724       olens[i] = olen;
3725       dlens[i] = dlen;
3726     }
3727     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3728     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3729     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3730     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3731     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3732     ierr = PetscFree(dlens);CHKERRQ(ierr);
3733   } else {
3734     PetscInt ml,nl;
3735 
3736     M    = *newmat;
3737     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3738     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3739     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3740     /*
3741          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3742        rather than the slower MatSetValues().
3743     */
3744     M->was_assembled = PETSC_TRUE;
3745     M->assembled     = PETSC_FALSE;
3746   }
3747   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3748   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3749   ii   = aij->i;
3750   jj   = aij->j;
3751   aa   = aij->a;
3752   for (i=0; i<m; i++) {
3753     row   = rstart + i;
3754     nz    = ii[i+1] - ii[i];
3755     cwork = jj;     jj += nz;
3756     vwork = aa;     aa += nz;
3757     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3758   }
3759 
3760   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3761   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3762   *newmat = M;
3763 
3764   /* save submatrix used in processor for next request */
3765   if (call ==  MAT_INITIAL_MATRIX) {
3766     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3767     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3768   }
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3773 {
3774   PetscInt       m,cstart, cend,j,nnz,i,d;
3775   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3776   const PetscInt *JJ;
3777   PetscScalar    *values;
3778   PetscErrorCode ierr;
3779   PetscBool      nooffprocentries;
3780 
3781   PetscFunctionBegin;
3782   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3783 
3784   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3785   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3786   m      = B->rmap->n;
3787   cstart = B->cmap->rstart;
3788   cend   = B->cmap->rend;
3789   rstart = B->rmap->rstart;
3790 
3791   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3792 
3793 #if defined(PETSC_USE_DEBUGGING)
3794   for (i=0; i<m; i++) {
3795     nnz = Ii[i+1]- Ii[i];
3796     JJ  = J + Ii[i];
3797     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3798     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3799     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3800   }
3801 #endif
3802 
3803   for (i=0; i<m; i++) {
3804     nnz     = Ii[i+1]- Ii[i];
3805     JJ      = J + Ii[i];
3806     nnz_max = PetscMax(nnz_max,nnz);
3807     d       = 0;
3808     for (j=0; j<nnz; j++) {
3809       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3810     }
3811     d_nnz[i] = d;
3812     o_nnz[i] = nnz - d;
3813   }
3814   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3815   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3816 
3817   if (v) values = (PetscScalar*)v;
3818   else {
3819     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3820   }
3821 
3822   for (i=0; i<m; i++) {
3823     ii   = i + rstart;
3824     nnz  = Ii[i+1]- Ii[i];
3825     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3826   }
3827   nooffprocentries    = B->nooffprocentries;
3828   B->nooffprocentries = PETSC_TRUE;
3829   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   B->nooffprocentries = nooffprocentries;
3832 
3833   if (!v) {
3834     ierr = PetscFree(values);CHKERRQ(ierr);
3835   }
3836   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3837   PetscFunctionReturn(0);
3838 }
3839 
3840 /*@
3841    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3842    (the default parallel PETSc format).
3843 
3844    Collective on MPI_Comm
3845 
3846    Input Parameters:
3847 +  B - the matrix
3848 .  i - the indices into j for the start of each local row (starts with zero)
3849 .  j - the column indices for each local row (starts with zero)
3850 -  v - optional values in the matrix
3851 
3852    Level: developer
3853 
3854    Notes:
3855        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3856      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3857      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3858 
3859        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3860 
3861        The format which is used for the sparse matrix input, is equivalent to a
3862     row-major ordering.. i.e for the following matrix, the input data expected is
3863     as shown
3864 
3865 $        1 0 0
3866 $        2 0 3     P0
3867 $       -------
3868 $        4 5 6     P1
3869 $
3870 $     Process0 [P0]: rows_owned=[0,1]
3871 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3872 $        j =  {0,0,2}  [size = 3]
3873 $        v =  {1,2,3}  [size = 3]
3874 $
3875 $     Process1 [P1]: rows_owned=[2]
3876 $        i =  {0,3}    [size = nrow+1  = 1+1]
3877 $        j =  {0,1,2}  [size = 3]
3878 $        v =  {4,5,6}  [size = 3]
3879 
3880 .keywords: matrix, aij, compressed row, sparse, parallel
3881 
3882 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3883           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3884 @*/
3885 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3886 {
3887   PetscErrorCode ierr;
3888 
3889   PetscFunctionBegin;
3890   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3891   PetscFunctionReturn(0);
3892 }
3893 
3894 /*@C
3895    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3896    (the default parallel PETSc format).  For good matrix assembly performance
3897    the user should preallocate the matrix storage by setting the parameters
3898    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3899    performance can be increased by more than a factor of 50.
3900 
3901    Collective on MPI_Comm
3902 
3903    Input Parameters:
3904 +  B - the matrix
3905 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3906            (same value is used for all local rows)
3907 .  d_nnz - array containing the number of nonzeros in the various rows of the
3908            DIAGONAL portion of the local submatrix (possibly different for each row)
3909            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3910            The size of this array is equal to the number of local rows, i.e 'm'.
3911            For matrices that will be factored, you must leave room for (and set)
3912            the diagonal entry even if it is zero.
3913 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3914            submatrix (same value is used for all local rows).
3915 -  o_nnz - array containing the number of nonzeros in the various rows of the
3916            OFF-DIAGONAL portion of the local submatrix (possibly different for
3917            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3918            structure. The size of this array is equal to the number
3919            of local rows, i.e 'm'.
3920 
3921    If the *_nnz parameter is given then the *_nz parameter is ignored
3922 
3923    The AIJ format (also called the Yale sparse matrix format or
3924    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3925    storage.  The stored row and column indices begin with zero.
3926    See Users-Manual: ch_mat for details.
3927 
3928    The parallel matrix is partitioned such that the first m0 rows belong to
3929    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3930    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3931 
3932    The DIAGONAL portion of the local submatrix of a processor can be defined
3933    as the submatrix which is obtained by extraction the part corresponding to
3934    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3935    first row that belongs to the processor, r2 is the last row belonging to
3936    the this processor, and c1-c2 is range of indices of the local part of a
3937    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3938    common case of a square matrix, the row and column ranges are the same and
3939    the DIAGONAL part is also square. The remaining portion of the local
3940    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3941 
3942    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3943 
3944    You can call MatGetInfo() to get information on how effective the preallocation was;
3945    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3946    You can also run with the option -info and look for messages with the string
3947    malloc in them to see if additional memory allocation was needed.
3948 
3949    Example usage:
3950 
3951    Consider the following 8x8 matrix with 34 non-zero values, that is
3952    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3953    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3954    as follows:
3955 
3956 .vb
3957             1  2  0  |  0  3  0  |  0  4
3958     Proc0   0  5  6  |  7  0  0  |  8  0
3959             9  0 10  | 11  0  0  | 12  0
3960     -------------------------------------
3961            13  0 14  | 15 16 17  |  0  0
3962     Proc1   0 18  0  | 19 20 21  |  0  0
3963             0  0  0  | 22 23  0  | 24  0
3964     -------------------------------------
3965     Proc2  25 26 27  |  0  0 28  | 29  0
3966            30  0  0  | 31 32 33  |  0 34
3967 .ve
3968 
3969    This can be represented as a collection of submatrices as:
3970 
3971 .vb
3972       A B C
3973       D E F
3974       G H I
3975 .ve
3976 
3977    Where the submatrices A,B,C are owned by proc0, D,E,F are
3978    owned by proc1, G,H,I are owned by proc2.
3979 
3980    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3981    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3982    The 'M','N' parameters are 8,8, and have the same values on all procs.
3983 
3984    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3985    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3986    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3987    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3988    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3989    matrix, ans [DF] as another SeqAIJ matrix.
3990 
3991    When d_nz, o_nz parameters are specified, d_nz storage elements are
3992    allocated for every row of the local diagonal submatrix, and o_nz
3993    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3994    One way to choose d_nz and o_nz is to use the max nonzerors per local
3995    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3996    In this case, the values of d_nz,o_nz are:
3997 .vb
3998      proc0 : dnz = 2, o_nz = 2
3999      proc1 : dnz = 3, o_nz = 2
4000      proc2 : dnz = 1, o_nz = 4
4001 .ve
4002    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4003    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4004    for proc3. i.e we are using 12+15+10=37 storage locations to store
4005    34 values.
4006 
4007    When d_nnz, o_nnz parameters are specified, the storage is specified
4008    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4009    In the above case the values for d_nnz,o_nnz are:
4010 .vb
4011      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4012      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4013      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4014 .ve
4015    Here the space allocated is sum of all the above values i.e 34, and
4016    hence pre-allocation is perfect.
4017 
4018    Level: intermediate
4019 
4020 .keywords: matrix, aij, compressed row, sparse, parallel
4021 
4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4023           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4024 @*/
4025 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4026 {
4027   PetscErrorCode ierr;
4028 
4029   PetscFunctionBegin;
4030   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4031   PetscValidType(B,1);
4032   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4033   PetscFunctionReturn(0);
4034 }
4035 
4036 /*@
4037      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4038          CSR format the local rows.
4039 
4040    Collective on MPI_Comm
4041 
4042    Input Parameters:
4043 +  comm - MPI communicator
4044 .  m - number of local rows (Cannot be PETSC_DECIDE)
4045 .  n - This value should be the same as the local size used in creating the
4046        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4047        calculated if N is given) For square matrices n is almost always m.
4048 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4049 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4050 .   i - row indices
4051 .   j - column indices
4052 -   a - matrix values
4053 
4054    Output Parameter:
4055 .   mat - the matrix
4056 
4057    Level: intermediate
4058 
4059    Notes:
4060        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4061      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4062      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4063 
4064        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4065 
4066        The format which is used for the sparse matrix input, is equivalent to a
4067     row-major ordering.. i.e for the following matrix, the input data expected is
4068     as shown
4069 
4070 $        1 0 0
4071 $        2 0 3     P0
4072 $       -------
4073 $        4 5 6     P1
4074 $
4075 $     Process0 [P0]: rows_owned=[0,1]
4076 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4077 $        j =  {0,0,2}  [size = 3]
4078 $        v =  {1,2,3}  [size = 3]
4079 $
4080 $     Process1 [P1]: rows_owned=[2]
4081 $        i =  {0,3}    [size = nrow+1  = 1+1]
4082 $        j =  {0,1,2}  [size = 3]
4083 $        v =  {4,5,6}  [size = 3]
4084 
4085 .keywords: matrix, aij, compressed row, sparse, parallel
4086 
4087 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4088           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4089 @*/
4090 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4091 {
4092   PetscErrorCode ierr;
4093 
4094   PetscFunctionBegin;
4095   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4096   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4097   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4098   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4099   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4100   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4101   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4102   PetscFunctionReturn(0);
4103 }
4104 
4105 /*@C
4106    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4107    (the default parallel PETSc format).  For good matrix assembly performance
4108    the user should preallocate the matrix storage by setting the parameters
4109    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4110    performance can be increased by more than a factor of 50.
4111 
4112    Collective on MPI_Comm
4113 
4114    Input Parameters:
4115 +  comm - MPI communicator
4116 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4117            This value should be the same as the local size used in creating the
4118            y vector for the matrix-vector product y = Ax.
4119 .  n - This value should be the same as the local size used in creating the
4120        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4121        calculated if N is given) For square matrices n is almost always m.
4122 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4123 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4124 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4125            (same value is used for all local rows)
4126 .  d_nnz - array containing the number of nonzeros in the various rows of the
4127            DIAGONAL portion of the local submatrix (possibly different for each row)
4128            or NULL, if d_nz is used to specify the nonzero structure.
4129            The size of this array is equal to the number of local rows, i.e 'm'.
4130 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4131            submatrix (same value is used for all local rows).
4132 -  o_nnz - array containing the number of nonzeros in the various rows of the
4133            OFF-DIAGONAL portion of the local submatrix (possibly different for
4134            each row) or NULL, if o_nz is used to specify the nonzero
4135            structure. The size of this array is equal to the number
4136            of local rows, i.e 'm'.
4137 
4138    Output Parameter:
4139 .  A - the matrix
4140 
4141    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4142    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4143    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4144 
4145    Notes:
4146    If the *_nnz parameter is given then the *_nz parameter is ignored
4147 
4148    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4149    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4150    storage requirements for this matrix.
4151 
4152    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4153    processor than it must be used on all processors that share the object for
4154    that argument.
4155 
4156    The user MUST specify either the local or global matrix dimensions
4157    (possibly both).
4158 
4159    The parallel matrix is partitioned across processors such that the
4160    first m0 rows belong to process 0, the next m1 rows belong to
4161    process 1, the next m2 rows belong to process 2 etc.. where
4162    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4163    values corresponding to [m x N] submatrix.
4164 
4165    The columns are logically partitioned with the n0 columns belonging
4166    to 0th partition, the next n1 columns belonging to the next
4167    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4168 
4169    The DIAGONAL portion of the local submatrix on any given processor
4170    is the submatrix corresponding to the rows and columns m,n
4171    corresponding to the given processor. i.e diagonal matrix on
4172    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4173    etc. The remaining portion of the local submatrix [m x (N-n)]
4174    constitute the OFF-DIAGONAL portion. The example below better
4175    illustrates this concept.
4176 
4177    For a square global matrix we define each processor's diagonal portion
4178    to be its local rows and the corresponding columns (a square submatrix);
4179    each processor's off-diagonal portion encompasses the remainder of the
4180    local matrix (a rectangular submatrix).
4181 
4182    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4183 
4184    When calling this routine with a single process communicator, a matrix of
4185    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4186    type of communicator, use the construction mechanism
4187 .vb
4188      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4189 .ve
4190 
4191 $     MatCreate(...,&A);
4192 $     MatSetType(A,MATMPIAIJ);
4193 $     MatSetSizes(A, m,n,M,N);
4194 $     MatMPIAIJSetPreallocation(A,...);
4195 
4196    By default, this format uses inodes (identical nodes) when possible.
4197    We search for consecutive rows with the same nonzero structure, thereby
4198    reusing matrix information to achieve increased efficiency.
4199 
4200    Options Database Keys:
4201 +  -mat_no_inode  - Do not use inodes
4202 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4203 -  -mat_aij_oneindex - Internally use indexing starting at 1
4204         rather than 0.  Note that when calling MatSetValues(),
4205         the user still MUST index entries starting at 0!
4206 
4207 
4208    Example usage:
4209 
4210    Consider the following 8x8 matrix with 34 non-zero values, that is
4211    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4212    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4213    as follows
4214 
4215 .vb
4216             1  2  0  |  0  3  0  |  0  4
4217     Proc0   0  5  6  |  7  0  0  |  8  0
4218             9  0 10  | 11  0  0  | 12  0
4219     -------------------------------------
4220            13  0 14  | 15 16 17  |  0  0
4221     Proc1   0 18  0  | 19 20 21  |  0  0
4222             0  0  0  | 22 23  0  | 24  0
4223     -------------------------------------
4224     Proc2  25 26 27  |  0  0 28  | 29  0
4225            30  0  0  | 31 32 33  |  0 34
4226 .ve
4227 
4228    This can be represented as a collection of submatrices as
4229 
4230 .vb
4231       A B C
4232       D E F
4233       G H I
4234 .ve
4235 
4236    Where the submatrices A,B,C are owned by proc0, D,E,F are
4237    owned by proc1, G,H,I are owned by proc2.
4238 
4239    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4240    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4241    The 'M','N' parameters are 8,8, and have the same values on all procs.
4242 
4243    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4244    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4245    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4246    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4247    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4248    matrix, ans [DF] as another SeqAIJ matrix.
4249 
4250    When d_nz, o_nz parameters are specified, d_nz storage elements are
4251    allocated for every row of the local diagonal submatrix, and o_nz
4252    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4253    One way to choose d_nz and o_nz is to use the max nonzerors per local
4254    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4255    In this case, the values of d_nz,o_nz are
4256 .vb
4257      proc0 : dnz = 2, o_nz = 2
4258      proc1 : dnz = 3, o_nz = 2
4259      proc2 : dnz = 1, o_nz = 4
4260 .ve
4261    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4262    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4263    for proc3. i.e we are using 12+15+10=37 storage locations to store
4264    34 values.
4265 
4266    When d_nnz, o_nnz parameters are specified, the storage is specified
4267    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4268    In the above case the values for d_nnz,o_nnz are
4269 .vb
4270      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4271      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4272      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4273 .ve
4274    Here the space allocated is sum of all the above values i.e 34, and
4275    hence pre-allocation is perfect.
4276 
4277    Level: intermediate
4278 
4279 .keywords: matrix, aij, compressed row, sparse, parallel
4280 
4281 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4282           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4283 @*/
4284 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4285 {
4286   PetscErrorCode ierr;
4287   PetscMPIInt    size;
4288 
4289   PetscFunctionBegin;
4290   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4291   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4292   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4293   if (size > 1) {
4294     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4295     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4296   } else {
4297     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4298     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4299   }
4300   PetscFunctionReturn(0);
4301 }
4302 
4303 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4304 {
4305   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4306   PetscBool      flg;
4307   PetscErrorCode ierr;
4308 
4309   PetscFunctionBegin;
4310   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4311   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4312   if (Ad)     *Ad     = a->A;
4313   if (Ao)     *Ao     = a->B;
4314   if (colmap) *colmap = a->garray;
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4319 {
4320   PetscErrorCode ierr;
4321   PetscInt       m,N,i,rstart,nnz,Ii;
4322   PetscInt       *indx;
4323   PetscScalar    *values;
4324 
4325   PetscFunctionBegin;
4326   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4327   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4328     PetscInt       *dnz,*onz,sum,bs,cbs;
4329 
4330     if (n == PETSC_DECIDE) {
4331       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4332     }
4333     /* Check sum(n) = N */
4334     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4335     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4336 
4337     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4338     rstart -= m;
4339 
4340     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4341     for (i=0; i<m; i++) {
4342       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4343       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4344       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4345     }
4346 
4347     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4348     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4349     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4350     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4351     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4352     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4353     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4354     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4355   }
4356 
4357   /* numeric phase */
4358   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4359   for (i=0; i<m; i++) {
4360     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4361     Ii   = i + rstart;
4362     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4363     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4364   }
4365   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4366   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4367   PetscFunctionReturn(0);
4368 }
4369 
4370 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4371 {
4372   PetscErrorCode    ierr;
4373   PetscMPIInt       rank;
4374   PetscInt          m,N,i,rstart,nnz;
4375   size_t            len;
4376   const PetscInt    *indx;
4377   PetscViewer       out;
4378   char              *name;
4379   Mat               B;
4380   const PetscScalar *values;
4381 
4382   PetscFunctionBegin;
4383   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4384   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4385   /* Should this be the type of the diagonal block of A? */
4386   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4387   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4388   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4389   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4390   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4391   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4392   for (i=0; i<m; i++) {
4393     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4394     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4395     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4396   }
4397   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4398   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4399 
4400   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4401   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4402   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4403   sprintf(name,"%s.%d",outfile,rank);
4404   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4405   ierr = PetscFree(name);CHKERRQ(ierr);
4406   ierr = MatView(B,out);CHKERRQ(ierr);
4407   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4408   ierr = MatDestroy(&B);CHKERRQ(ierr);
4409   PetscFunctionReturn(0);
4410 }
4411 
4412 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4413 {
4414   PetscErrorCode      ierr;
4415   Mat_Merge_SeqsToMPI *merge;
4416   PetscContainer      container;
4417 
4418   PetscFunctionBegin;
4419   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4420   if (container) {
4421     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4430     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4431     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4432     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4433     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4434     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4435     ierr = PetscFree(merge);CHKERRQ(ierr);
4436     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4437   }
4438   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4439   PetscFunctionReturn(0);
4440 }
4441 
4442 #include <../src/mat/utils/freespace.h>
4443 #include <petscbt.h>
4444 
4445 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4446 {
4447   PetscErrorCode      ierr;
4448   MPI_Comm            comm;
4449   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4450   PetscMPIInt         size,rank,taga,*len_s;
4451   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4452   PetscInt            proc,m;
4453   PetscInt            **buf_ri,**buf_rj;
4454   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4455   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4456   MPI_Request         *s_waits,*r_waits;
4457   MPI_Status          *status;
4458   MatScalar           *aa=a->a;
4459   MatScalar           **abuf_r,*ba_i;
4460   Mat_Merge_SeqsToMPI *merge;
4461   PetscContainer      container;
4462 
4463   PetscFunctionBegin;
4464   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4465   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4466 
4467   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4468   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4469 
4470   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4471   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4472 
4473   bi     = merge->bi;
4474   bj     = merge->bj;
4475   buf_ri = merge->buf_ri;
4476   buf_rj = merge->buf_rj;
4477 
4478   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4479   owners = merge->rowmap->range;
4480   len_s  = merge->len_s;
4481 
4482   /* send and recv matrix values */
4483   /*-----------------------------*/
4484   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4485   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4486 
4487   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4488   for (proc=0,k=0; proc<size; proc++) {
4489     if (!len_s[proc]) continue;
4490     i    = owners[proc];
4491     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4492     k++;
4493   }
4494 
4495   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4496   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4497   ierr = PetscFree(status);CHKERRQ(ierr);
4498 
4499   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4500   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4501 
4502   /* insert mat values of mpimat */
4503   /*----------------------------*/
4504   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4505   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4506 
4507   for (k=0; k<merge->nrecv; k++) {
4508     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4509     nrows       = *(buf_ri_k[k]);
4510     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4511     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4512   }
4513 
4514   /* set values of ba */
4515   m = merge->rowmap->n;
4516   for (i=0; i<m; i++) {
4517     arow = owners[rank] + i;
4518     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4519     bnzi = bi[i+1] - bi[i];
4520     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4521 
4522     /* add local non-zero vals of this proc's seqmat into ba */
4523     anzi   = ai[arow+1] - ai[arow];
4524     aj     = a->j + ai[arow];
4525     aa     = a->a + ai[arow];
4526     nextaj = 0;
4527     for (j=0; nextaj<anzi; j++) {
4528       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4529         ba_i[j] += aa[nextaj++];
4530       }
4531     }
4532 
4533     /* add received vals into ba */
4534     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4535       /* i-th row */
4536       if (i == *nextrow[k]) {
4537         anzi   = *(nextai[k]+1) - *nextai[k];
4538         aj     = buf_rj[k] + *(nextai[k]);
4539         aa     = abuf_r[k] + *(nextai[k]);
4540         nextaj = 0;
4541         for (j=0; nextaj<anzi; j++) {
4542           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4543             ba_i[j] += aa[nextaj++];
4544           }
4545         }
4546         nextrow[k]++; nextai[k]++;
4547       }
4548     }
4549     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4550   }
4551   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4552   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4553 
4554   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4555   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4556   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4557   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4558   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4559   PetscFunctionReturn(0);
4560 }
4561 
4562 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4563 {
4564   PetscErrorCode      ierr;
4565   Mat                 B_mpi;
4566   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4567   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4568   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4569   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4570   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4571   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4572   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4573   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4574   MPI_Status          *status;
4575   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4576   PetscBT             lnkbt;
4577   Mat_Merge_SeqsToMPI *merge;
4578   PetscContainer      container;
4579 
4580   PetscFunctionBegin;
4581   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4582 
4583   /* make sure it is a PETSc comm */
4584   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4585   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4586   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4587 
4588   ierr = PetscNew(&merge);CHKERRQ(ierr);
4589   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4590 
4591   /* determine row ownership */
4592   /*---------------------------------------------------------*/
4593   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4594   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4595   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4596   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4597   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4598   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4599   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4600 
4601   m      = merge->rowmap->n;
4602   owners = merge->rowmap->range;
4603 
4604   /* determine the number of messages to send, their lengths */
4605   /*---------------------------------------------------------*/
4606   len_s = merge->len_s;
4607 
4608   len          = 0; /* length of buf_si[] */
4609   merge->nsend = 0;
4610   for (proc=0; proc<size; proc++) {
4611     len_si[proc] = 0;
4612     if (proc == rank) {
4613       len_s[proc] = 0;
4614     } else {
4615       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4616       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4617     }
4618     if (len_s[proc]) {
4619       merge->nsend++;
4620       nrows = 0;
4621       for (i=owners[proc]; i<owners[proc+1]; i++) {
4622         if (ai[i+1] > ai[i]) nrows++;
4623       }
4624       len_si[proc] = 2*(nrows+1);
4625       len         += len_si[proc];
4626     }
4627   }
4628 
4629   /* determine the number and length of messages to receive for ij-structure */
4630   /*-------------------------------------------------------------------------*/
4631   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4632   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4633 
4634   /* post the Irecv of j-structure */
4635   /*-------------------------------*/
4636   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4637   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4638 
4639   /* post the Isend of j-structure */
4640   /*--------------------------------*/
4641   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4642 
4643   for (proc=0, k=0; proc<size; proc++) {
4644     if (!len_s[proc]) continue;
4645     i    = owners[proc];
4646     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4647     k++;
4648   }
4649 
4650   /* receives and sends of j-structure are complete */
4651   /*------------------------------------------------*/
4652   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4653   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4654 
4655   /* send and recv i-structure */
4656   /*---------------------------*/
4657   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4658   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4659 
4660   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4661   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4662   for (proc=0,k=0; proc<size; proc++) {
4663     if (!len_s[proc]) continue;
4664     /* form outgoing message for i-structure:
4665          buf_si[0]:                 nrows to be sent
4666                [1:nrows]:           row index (global)
4667                [nrows+1:2*nrows+1]: i-structure index
4668     */
4669     /*-------------------------------------------*/
4670     nrows       = len_si[proc]/2 - 1;
4671     buf_si_i    = buf_si + nrows+1;
4672     buf_si[0]   = nrows;
4673     buf_si_i[0] = 0;
4674     nrows       = 0;
4675     for (i=owners[proc]; i<owners[proc+1]; i++) {
4676       anzi = ai[i+1] - ai[i];
4677       if (anzi) {
4678         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4679         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4680         nrows++;
4681       }
4682     }
4683     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4684     k++;
4685     buf_si += len_si[proc];
4686   }
4687 
4688   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4689   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4690 
4691   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4692   for (i=0; i<merge->nrecv; i++) {
4693     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4694   }
4695 
4696   ierr = PetscFree(len_si);CHKERRQ(ierr);
4697   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4698   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4699   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4700   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4701   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4702   ierr = PetscFree(status);CHKERRQ(ierr);
4703 
4704   /* compute a local seq matrix in each processor */
4705   /*----------------------------------------------*/
4706   /* allocate bi array and free space for accumulating nonzero column info */
4707   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4708   bi[0] = 0;
4709 
4710   /* create and initialize a linked list */
4711   nlnk = N+1;
4712   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4713 
4714   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4715   len  = ai[owners[rank+1]] - ai[owners[rank]];
4716   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4717 
4718   current_space = free_space;
4719 
4720   /* determine symbolic info for each local row */
4721   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4722 
4723   for (k=0; k<merge->nrecv; k++) {
4724     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4725     nrows       = *buf_ri_k[k];
4726     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4727     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4728   }
4729 
4730   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4731   len  = 0;
4732   for (i=0; i<m; i++) {
4733     bnzi = 0;
4734     /* add local non-zero cols of this proc's seqmat into lnk */
4735     arow  = owners[rank] + i;
4736     anzi  = ai[arow+1] - ai[arow];
4737     aj    = a->j + ai[arow];
4738     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4739     bnzi += nlnk;
4740     /* add received col data into lnk */
4741     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4742       if (i == *nextrow[k]) { /* i-th row */
4743         anzi  = *(nextai[k]+1) - *nextai[k];
4744         aj    = buf_rj[k] + *nextai[k];
4745         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4746         bnzi += nlnk;
4747         nextrow[k]++; nextai[k]++;
4748       }
4749     }
4750     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4751 
4752     /* if free space is not available, make more free space */
4753     if (current_space->local_remaining<bnzi) {
4754       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4755       nspacedouble++;
4756     }
4757     /* copy data into free space, then initialize lnk */
4758     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4759     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4760 
4761     current_space->array           += bnzi;
4762     current_space->local_used      += bnzi;
4763     current_space->local_remaining -= bnzi;
4764 
4765     bi[i+1] = bi[i] + bnzi;
4766   }
4767 
4768   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4769 
4770   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4771   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4772   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4773 
4774   /* create symbolic parallel matrix B_mpi */
4775   /*---------------------------------------*/
4776   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4777   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4778   if (n==PETSC_DECIDE) {
4779     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4780   } else {
4781     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4782   }
4783   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4784   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4785   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4786   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4787   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4788 
4789   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4790   B_mpi->assembled    = PETSC_FALSE;
4791   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4792   merge->bi           = bi;
4793   merge->bj           = bj;
4794   merge->buf_ri       = buf_ri;
4795   merge->buf_rj       = buf_rj;
4796   merge->coi          = NULL;
4797   merge->coj          = NULL;
4798   merge->owners_co    = NULL;
4799 
4800   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4801 
4802   /* attach the supporting struct to B_mpi for reuse */
4803   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4804   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4805   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4806   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4807   *mpimat = B_mpi;
4808 
4809   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4810   PetscFunctionReturn(0);
4811 }
4812 
4813 /*@C
4814       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4815                  matrices from each processor
4816 
4817     Collective on MPI_Comm
4818 
4819    Input Parameters:
4820 +    comm - the communicators the parallel matrix will live on
4821 .    seqmat - the input sequential matrices
4822 .    m - number of local rows (or PETSC_DECIDE)
4823 .    n - number of local columns (or PETSC_DECIDE)
4824 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4825 
4826    Output Parameter:
4827 .    mpimat - the parallel matrix generated
4828 
4829     Level: advanced
4830 
4831    Notes:
4832      The dimensions of the sequential matrix in each processor MUST be the same.
4833      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4834      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4835 @*/
4836 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4837 {
4838   PetscErrorCode ierr;
4839   PetscMPIInt    size;
4840 
4841   PetscFunctionBegin;
4842   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4843   if (size == 1) {
4844     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4845     if (scall == MAT_INITIAL_MATRIX) {
4846       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4847     } else {
4848       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4849     }
4850     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4851     PetscFunctionReturn(0);
4852   }
4853   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4854   if (scall == MAT_INITIAL_MATRIX) {
4855     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4856   }
4857   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4858   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4859   PetscFunctionReturn(0);
4860 }
4861 
4862 /*@
4863      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4864           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4865           with MatGetSize()
4866 
4867     Not Collective
4868 
4869    Input Parameters:
4870 +    A - the matrix
4871 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4872 
4873    Output Parameter:
4874 .    A_loc - the local sequential matrix generated
4875 
4876     Level: developer
4877 
4878 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4879 
4880 @*/
4881 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4882 {
4883   PetscErrorCode ierr;
4884   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4885   Mat_SeqAIJ     *mat,*a,*b;
4886   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4887   MatScalar      *aa,*ba,*cam;
4888   PetscScalar    *ca;
4889   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4890   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4891   PetscBool      match;
4892   MPI_Comm       comm;
4893   PetscMPIInt    size;
4894 
4895   PetscFunctionBegin;
4896   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4897   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4898   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4899   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4900   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4901 
4902   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4903   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4904   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4905   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4906   aa = a->a; ba = b->a;
4907   if (scall == MAT_INITIAL_MATRIX) {
4908     if (size == 1) {
4909       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4910       PetscFunctionReturn(0);
4911     }
4912 
4913     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4914     ci[0] = 0;
4915     for (i=0; i<am; i++) {
4916       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4917     }
4918     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4919     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4920     k    = 0;
4921     for (i=0; i<am; i++) {
4922       ncols_o = bi[i+1] - bi[i];
4923       ncols_d = ai[i+1] - ai[i];
4924       /* off-diagonal portion of A */
4925       for (jo=0; jo<ncols_o; jo++) {
4926         col = cmap[*bj];
4927         if (col >= cstart) break;
4928         cj[k]   = col; bj++;
4929         ca[k++] = *ba++;
4930       }
4931       /* diagonal portion of A */
4932       for (j=0; j<ncols_d; j++) {
4933         cj[k]   = cstart + *aj++;
4934         ca[k++] = *aa++;
4935       }
4936       /* off-diagonal portion of A */
4937       for (j=jo; j<ncols_o; j++) {
4938         cj[k]   = cmap[*bj++];
4939         ca[k++] = *ba++;
4940       }
4941     }
4942     /* put together the new matrix */
4943     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4944     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4945     /* Since these are PETSc arrays, change flags to free them as necessary. */
4946     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4947     mat->free_a  = PETSC_TRUE;
4948     mat->free_ij = PETSC_TRUE;
4949     mat->nonew   = 0;
4950   } else if (scall == MAT_REUSE_MATRIX) {
4951     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4952     ci = mat->i; cj = mat->j; cam = mat->a;
4953     for (i=0; i<am; i++) {
4954       /* off-diagonal portion of A */
4955       ncols_o = bi[i+1] - bi[i];
4956       for (jo=0; jo<ncols_o; jo++) {
4957         col = cmap[*bj];
4958         if (col >= cstart) break;
4959         *cam++ = *ba++; bj++;
4960       }
4961       /* diagonal portion of A */
4962       ncols_d = ai[i+1] - ai[i];
4963       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4964       /* off-diagonal portion of A */
4965       for (j=jo; j<ncols_o; j++) {
4966         *cam++ = *ba++; bj++;
4967       }
4968     }
4969   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4970   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4971   PetscFunctionReturn(0);
4972 }
4973 
4974 /*@C
4975      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4976 
4977     Not Collective
4978 
4979    Input Parameters:
4980 +    A - the matrix
4981 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4982 -    row, col - index sets of rows and columns to extract (or NULL)
4983 
4984    Output Parameter:
4985 .    A_loc - the local sequential matrix generated
4986 
4987     Level: developer
4988 
4989 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4990 
4991 @*/
4992 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4993 {
4994   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4995   PetscErrorCode ierr;
4996   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4997   IS             isrowa,iscola;
4998   Mat            *aloc;
4999   PetscBool      match;
5000 
5001   PetscFunctionBegin;
5002   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5003   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5004   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5005   if (!row) {
5006     start = A->rmap->rstart; end = A->rmap->rend;
5007     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5008   } else {
5009     isrowa = *row;
5010   }
5011   if (!col) {
5012     start = A->cmap->rstart;
5013     cmap  = a->garray;
5014     nzA   = a->A->cmap->n;
5015     nzB   = a->B->cmap->n;
5016     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5017     ncols = 0;
5018     for (i=0; i<nzB; i++) {
5019       if (cmap[i] < start) idx[ncols++] = cmap[i];
5020       else break;
5021     }
5022     imark = i;
5023     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5024     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5025     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5026   } else {
5027     iscola = *col;
5028   }
5029   if (scall != MAT_INITIAL_MATRIX) {
5030     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5031     aloc[0] = *A_loc;
5032   }
5033   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5034   *A_loc = aloc[0];
5035   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5036   if (!row) {
5037     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5038   }
5039   if (!col) {
5040     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5041   }
5042   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5043   PetscFunctionReturn(0);
5044 }
5045 
5046 /*@C
5047     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5048 
5049     Collective on Mat
5050 
5051    Input Parameters:
5052 +    A,B - the matrices in mpiaij format
5053 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5054 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5055 
5056    Output Parameter:
5057 +    rowb, colb - index sets of rows and columns of B to extract
5058 -    B_seq - the sequential matrix generated
5059 
5060     Level: developer
5061 
5062 @*/
5063 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5064 {
5065   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5066   PetscErrorCode ierr;
5067   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5068   IS             isrowb,iscolb;
5069   Mat            *bseq=NULL;
5070 
5071   PetscFunctionBegin;
5072   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5073     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5074   }
5075   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5076 
5077   if (scall == MAT_INITIAL_MATRIX) {
5078     start = A->cmap->rstart;
5079     cmap  = a->garray;
5080     nzA   = a->A->cmap->n;
5081     nzB   = a->B->cmap->n;
5082     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5083     ncols = 0;
5084     for (i=0; i<nzB; i++) {  /* row < local row index */
5085       if (cmap[i] < start) idx[ncols++] = cmap[i];
5086       else break;
5087     }
5088     imark = i;
5089     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5090     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5091     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5092     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5093   } else {
5094     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5095     isrowb  = *rowb; iscolb = *colb;
5096     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5097     bseq[0] = *B_seq;
5098   }
5099   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5100   *B_seq = bseq[0];
5101   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5102   if (!rowb) {
5103     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5104   } else {
5105     *rowb = isrowb;
5106   }
5107   if (!colb) {
5108     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5109   } else {
5110     *colb = iscolb;
5111   }
5112   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5113   PetscFunctionReturn(0);
5114 }
5115 
5116 /*
5117     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5118     of the OFF-DIAGONAL portion of local A
5119 
5120     Collective on Mat
5121 
5122    Input Parameters:
5123 +    A,B - the matrices in mpiaij format
5124 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5125 
5126    Output Parameter:
5127 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5128 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5129 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5130 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5131 
5132     Level: developer
5133 
5134 */
5135 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5136 {
5137   VecScatter_MPI_General *gen_to,*gen_from;
5138   PetscErrorCode         ierr;
5139   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5140   Mat_SeqAIJ             *b_oth;
5141   VecScatter             ctx;
5142   MPI_Comm               comm;
5143   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5144   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5145   PetscInt               *rvalues,*svalues;
5146   MatScalar              *b_otha,*bufa,*bufA;
5147   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5148   MPI_Request            *rwaits = NULL,*swaits = NULL;
5149   MPI_Status             *sstatus,rstatus;
5150   PetscMPIInt            jj,size;
5151   PetscInt               *cols,sbs,rbs;
5152   PetscScalar            *vals;
5153 
5154   PetscFunctionBegin;
5155   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5156   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5157 
5158   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5159     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5160   }
5161   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5162   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5163 
5164   if (size == 1) {
5165     startsj_s = NULL;
5166     bufa_ptr  = NULL;
5167     *B_oth    = NULL;
5168     PetscFunctionReturn(0);
5169   }
5170 
5171   if (!a->Mvctx_mpi1) { /* create a->Mvctx_mpi1 to be used for Mat-Mat ops */
5172     a->Mvctx_mpi1_flg = PETSC_TRUE;
5173     ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5174   }
5175   ctx = a->Mvctx_mpi1;
5176   tag = ((PetscObject)ctx)->tag;
5177 
5178   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5179   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5180   nrecvs   = gen_from->n;
5181   nsends   = gen_to->n;
5182 
5183   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5184   srow    = gen_to->indices;    /* local row index to be sent */
5185   sstarts = gen_to->starts;
5186   sprocs  = gen_to->procs;
5187   sstatus = gen_to->sstatus;
5188   sbs     = gen_to->bs;
5189   rstarts = gen_from->starts;
5190   rprocs  = gen_from->procs;
5191   rbs     = gen_from->bs;
5192 
5193   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5194   if (scall == MAT_INITIAL_MATRIX) {
5195     /* i-array */
5196     /*---------*/
5197     /*  post receives */
5198     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5199     for (i=0; i<nrecvs; i++) {
5200       rowlen = rvalues + rstarts[i]*rbs;
5201       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5202       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5203     }
5204 
5205     /* pack the outgoing message */
5206     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5207 
5208     sstartsj[0] = 0;
5209     rstartsj[0] = 0;
5210     len         = 0; /* total length of j or a array to be sent */
5211     k           = 0;
5212     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5213     for (i=0; i<nsends; i++) {
5214       rowlen = svalues + sstarts[i]*sbs;
5215       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5216       for (j=0; j<nrows; j++) {
5217         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5218         for (l=0; l<sbs; l++) {
5219           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5220 
5221           rowlen[j*sbs+l] = ncols;
5222 
5223           len += ncols;
5224           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5225         }
5226         k++;
5227       }
5228       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5229 
5230       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5231     }
5232     /* recvs and sends of i-array are completed */
5233     i = nrecvs;
5234     while (i--) {
5235       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5236     }
5237     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5238     ierr = PetscFree(svalues);CHKERRQ(ierr);
5239 
5240     /* allocate buffers for sending j and a arrays */
5241     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5242     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5243 
5244     /* create i-array of B_oth */
5245     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5246 
5247     b_othi[0] = 0;
5248     len       = 0; /* total length of j or a array to be received */
5249     k         = 0;
5250     for (i=0; i<nrecvs; i++) {
5251       rowlen = rvalues + rstarts[i]*rbs;
5252       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5253       for (j=0; j<nrows; j++) {
5254         b_othi[k+1] = b_othi[k] + rowlen[j];
5255         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5256         k++;
5257       }
5258       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5259     }
5260     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5261 
5262     /* allocate space for j and a arrrays of B_oth */
5263     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5264     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5265 
5266     /* j-array */
5267     /*---------*/
5268     /*  post receives of j-array */
5269     for (i=0; i<nrecvs; i++) {
5270       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5271       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5272     }
5273 
5274     /* pack the outgoing message j-array */
5275     k = 0;
5276     for (i=0; i<nsends; i++) {
5277       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5278       bufJ  = bufj+sstartsj[i];
5279       for (j=0; j<nrows; j++) {
5280         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5281         for (ll=0; ll<sbs; ll++) {
5282           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5283           for (l=0; l<ncols; l++) {
5284             *bufJ++ = cols[l];
5285           }
5286           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5287         }
5288       }
5289       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5290     }
5291 
5292     /* recvs and sends of j-array are completed */
5293     i = nrecvs;
5294     while (i--) {
5295       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5296     }
5297     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5298   } else if (scall == MAT_REUSE_MATRIX) {
5299     sstartsj = *startsj_s;
5300     rstartsj = *startsj_r;
5301     bufa     = *bufa_ptr;
5302     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5303     b_otha   = b_oth->a;
5304   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5305 
5306   /* a-array */
5307   /*---------*/
5308   /*  post receives of a-array */
5309   for (i=0; i<nrecvs; i++) {
5310     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5311     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5312   }
5313 
5314   /* pack the outgoing message a-array */
5315   k = 0;
5316   for (i=0; i<nsends; i++) {
5317     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5318     bufA  = bufa+sstartsj[i];
5319     for (j=0; j<nrows; j++) {
5320       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5321       for (ll=0; ll<sbs; ll++) {
5322         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5323         for (l=0; l<ncols; l++) {
5324           *bufA++ = vals[l];
5325         }
5326         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5327       }
5328     }
5329     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5330   }
5331   /* recvs and sends of a-array are completed */
5332   i = nrecvs;
5333   while (i--) {
5334     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5335   }
5336   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5337   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5338 
5339   if (scall == MAT_INITIAL_MATRIX) {
5340     /* put together the new matrix */
5341     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5342 
5343     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5344     /* Since these are PETSc arrays, change flags to free them as necessary. */
5345     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5346     b_oth->free_a  = PETSC_TRUE;
5347     b_oth->free_ij = PETSC_TRUE;
5348     b_oth->nonew   = 0;
5349 
5350     ierr = PetscFree(bufj);CHKERRQ(ierr);
5351     if (!startsj_s || !bufa_ptr) {
5352       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5353       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5354     } else {
5355       *startsj_s = sstartsj;
5356       *startsj_r = rstartsj;
5357       *bufa_ptr  = bufa;
5358     }
5359   }
5360   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5361   PetscFunctionReturn(0);
5362 }
5363 
5364 /*@C
5365   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5366 
5367   Not Collective
5368 
5369   Input Parameters:
5370 . A - The matrix in mpiaij format
5371 
5372   Output Parameter:
5373 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5374 . colmap - A map from global column index to local index into lvec
5375 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5376 
5377   Level: developer
5378 
5379 @*/
5380 #if defined(PETSC_USE_CTABLE)
5381 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5382 #else
5383 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5384 #endif
5385 {
5386   Mat_MPIAIJ *a;
5387 
5388   PetscFunctionBegin;
5389   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5390   PetscValidPointer(lvec, 2);
5391   PetscValidPointer(colmap, 3);
5392   PetscValidPointer(multScatter, 4);
5393   a = (Mat_MPIAIJ*) A->data;
5394   if (lvec) *lvec = a->lvec;
5395   if (colmap) *colmap = a->colmap;
5396   if (multScatter) *multScatter = a->Mvctx;
5397   PetscFunctionReturn(0);
5398 }
5399 
5400 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5401 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5402 #if defined(PETSC_HAVE_MKL_SPARSE)
5403 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5404 #endif
5405 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5406 #if defined(PETSC_HAVE_ELEMENTAL)
5407 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5408 #endif
5409 #if defined(PETSC_HAVE_HYPRE)
5410 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5411 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5412 #endif
5413 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5414 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5415 
5416 /*
5417     Computes (B'*A')' since computing B*A directly is untenable
5418 
5419                n                       p                          p
5420         (              )       (              )         (                  )
5421       m (      A       )  *  n (       B      )   =   m (         C        )
5422         (              )       (              )         (                  )
5423 
5424 */
5425 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5426 {
5427   PetscErrorCode ierr;
5428   Mat            At,Bt,Ct;
5429 
5430   PetscFunctionBegin;
5431   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5432   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5433   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5434   ierr = MatDestroy(&At);CHKERRQ(ierr);
5435   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5436   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5437   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5438   PetscFunctionReturn(0);
5439 }
5440 
5441 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5442 {
5443   PetscErrorCode ierr;
5444   PetscInt       m=A->rmap->n,n=B->cmap->n;
5445   Mat            Cmat;
5446 
5447   PetscFunctionBegin;
5448   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5449   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5450   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5451   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5452   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5453   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5454   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5455   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5456 
5457   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5458 
5459   *C = Cmat;
5460   PetscFunctionReturn(0);
5461 }
5462 
5463 /* ----------------------------------------------------------------*/
5464 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5465 {
5466   PetscErrorCode ierr;
5467 
5468   PetscFunctionBegin;
5469   if (scall == MAT_INITIAL_MATRIX) {
5470     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5471     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5472     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5473   }
5474   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5475   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5476   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5477   PetscFunctionReturn(0);
5478 }
5479 
5480 /*MC
5481    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5482 
5483    Options Database Keys:
5484 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5485 
5486   Level: beginner
5487 
5488 .seealso: MatCreateAIJ()
5489 M*/
5490 
5491 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5492 {
5493   Mat_MPIAIJ     *b;
5494   PetscErrorCode ierr;
5495   PetscMPIInt    size;
5496 
5497   PetscFunctionBegin;
5498   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5499 
5500   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5501   B->data       = (void*)b;
5502   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5503   B->assembled  = PETSC_FALSE;
5504   B->insertmode = NOT_SET_VALUES;
5505   b->size       = size;
5506 
5507   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5508 
5509   /* build cache for off array entries formed */
5510   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5511 
5512   b->donotstash  = PETSC_FALSE;
5513   b->colmap      = 0;
5514   b->garray      = 0;
5515   b->roworiented = PETSC_TRUE;
5516 
5517   /* stuff used for matrix vector multiply */
5518   b->lvec  = NULL;
5519   b->Mvctx = NULL;
5520 
5521   /* stuff for MatGetRow() */
5522   b->rowindices   = 0;
5523   b->rowvalues    = 0;
5524   b->getrowactive = PETSC_FALSE;
5525 
5526   /* flexible pointer used in CUSP/CUSPARSE classes */
5527   b->spptr = NULL;
5528 
5529   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5531   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5534   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5535   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5536   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5537   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5538 #if defined(PETSC_HAVE_MKL_SPARSE)
5539   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5540 #endif
5541   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5543 #if defined(PETSC_HAVE_ELEMENTAL)
5544   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5545 #endif
5546 #if defined(PETSC_HAVE_HYPRE)
5547   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5548 #endif
5549   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5550   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5551   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5554 #if defined(PETSC_HAVE_HYPRE)
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5556 #endif
5557   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5558   PetscFunctionReturn(0);
5559 }
5560 
5561 /*@C
5562      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5563          and "off-diagonal" part of the matrix in CSR format.
5564 
5565    Collective on MPI_Comm
5566 
5567    Input Parameters:
5568 +  comm - MPI communicator
5569 .  m - number of local rows (Cannot be PETSC_DECIDE)
5570 .  n - This value should be the same as the local size used in creating the
5571        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5572        calculated if N is given) For square matrices n is almost always m.
5573 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5574 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5575 .   i - row indices for "diagonal" portion of matrix
5576 .   j - column indices
5577 .   a - matrix values
5578 .   oi - row indices for "off-diagonal" portion of matrix
5579 .   oj - column indices
5580 -   oa - matrix values
5581 
5582    Output Parameter:
5583 .   mat - the matrix
5584 
5585    Level: advanced
5586 
5587    Notes:
5588        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5589        must free the arrays once the matrix has been destroyed and not before.
5590 
5591        The i and j indices are 0 based
5592 
5593        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5594 
5595        This sets local rows and cannot be used to set off-processor values.
5596 
5597        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5598        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5599        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5600        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5601        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5602        communication if it is known that only local entries will be set.
5603 
5604 .keywords: matrix, aij, compressed row, sparse, parallel
5605 
5606 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5607           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5608 @*/
5609 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5610 {
5611   PetscErrorCode ierr;
5612   Mat_MPIAIJ     *maij;
5613 
5614   PetscFunctionBegin;
5615   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5616   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5617   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5618   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5619   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5620   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5621   maij = (Mat_MPIAIJ*) (*mat)->data;
5622 
5623   (*mat)->preallocated = PETSC_TRUE;
5624 
5625   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5626   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5627 
5628   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5629   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5630 
5631   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5632   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5633   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5634   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5635 
5636   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5637   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5638   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5639   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5640   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5641   PetscFunctionReturn(0);
5642 }
5643 
5644 /*
5645     Special version for direct calls from Fortran
5646 */
5647 #include <petsc/private/fortranimpl.h>
5648 
5649 /* Change these macros so can be used in void function */
5650 #undef CHKERRQ
5651 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5652 #undef SETERRQ2
5653 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5654 #undef SETERRQ3
5655 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5656 #undef SETERRQ
5657 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5658 
5659 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5660 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5661 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5662 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5663 #else
5664 #endif
5665 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5666 {
5667   Mat            mat  = *mmat;
5668   PetscInt       m    = *mm, n = *mn;
5669   InsertMode     addv = *maddv;
5670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5671   PetscScalar    value;
5672   PetscErrorCode ierr;
5673 
5674   MatCheckPreallocated(mat,1);
5675   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5676 
5677 #if defined(PETSC_USE_DEBUG)
5678   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5679 #endif
5680   {
5681     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5682     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5683     PetscBool roworiented = aij->roworiented;
5684 
5685     /* Some Variables required in the macro */
5686     Mat        A                 = aij->A;
5687     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5688     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5689     MatScalar  *aa               = a->a;
5690     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5691     Mat        B                 = aij->B;
5692     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5693     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5694     MatScalar  *ba               = b->a;
5695 
5696     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5697     PetscInt  nonew = a->nonew;
5698     MatScalar *ap1,*ap2;
5699 
5700     PetscFunctionBegin;
5701     for (i=0; i<m; i++) {
5702       if (im[i] < 0) continue;
5703 #if defined(PETSC_USE_DEBUG)
5704       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5705 #endif
5706       if (im[i] >= rstart && im[i] < rend) {
5707         row      = im[i] - rstart;
5708         lastcol1 = -1;
5709         rp1      = aj + ai[row];
5710         ap1      = aa + ai[row];
5711         rmax1    = aimax[row];
5712         nrow1    = ailen[row];
5713         low1     = 0;
5714         high1    = nrow1;
5715         lastcol2 = -1;
5716         rp2      = bj + bi[row];
5717         ap2      = ba + bi[row];
5718         rmax2    = bimax[row];
5719         nrow2    = bilen[row];
5720         low2     = 0;
5721         high2    = nrow2;
5722 
5723         for (j=0; j<n; j++) {
5724           if (roworiented) value = v[i*n+j];
5725           else value = v[i+j*m];
5726           if (in[j] >= cstart && in[j] < cend) {
5727             col = in[j] - cstart;
5728             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5729             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5730           } else if (in[j] < 0) continue;
5731 #if defined(PETSC_USE_DEBUG)
5732           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5733 #endif
5734           else {
5735             if (mat->was_assembled) {
5736               if (!aij->colmap) {
5737                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5738               }
5739 #if defined(PETSC_USE_CTABLE)
5740               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5741               col--;
5742 #else
5743               col = aij->colmap[in[j]] - 1;
5744 #endif
5745               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5746               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5747                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5748                 col  =  in[j];
5749                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5750                 B     = aij->B;
5751                 b     = (Mat_SeqAIJ*)B->data;
5752                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5753                 rp2   = bj + bi[row];
5754                 ap2   = ba + bi[row];
5755                 rmax2 = bimax[row];
5756                 nrow2 = bilen[row];
5757                 low2  = 0;
5758                 high2 = nrow2;
5759                 bm    = aij->B->rmap->n;
5760                 ba    = b->a;
5761               }
5762             } else col = in[j];
5763             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5764           }
5765         }
5766       } else if (!aij->donotstash) {
5767         if (roworiented) {
5768           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5769         } else {
5770           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5771         }
5772       }
5773     }
5774   }
5775   PetscFunctionReturnVoid();
5776 }
5777 
5778