xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 07250d77144fbbd7145c7dd573a0c2dc873dfd16)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1042 {
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1051 {
1052   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1053   PetscErrorCode ierr;
1054 
1055   PetscFunctionBegin;
1056   /* do nondiagonal part */
1057   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1058   /* send it on its way */
1059   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1060   /* do local part */
1061   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   /* receive remote parts */
1063   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   PetscErrorCode ierr;
1074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1075 
1076   PetscFunctionBegin;
1077   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1078   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1079   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1090   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1095 {
1096   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100 #if defined(PETSC_USE_LOG)
1101   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1102 #endif
1103   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1105   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1106   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1107 #if defined(PETSC_USE_CTABLE)
1108   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1109 #else
1110   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1111 #endif
1112   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1113   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1114   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1115   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1116   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1117   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1118   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1119 
1120   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1129 #if defined(PETSC_HAVE_ELEMENTAL)
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1131 #endif
1132 #if defined(PETSC_HAVE_HYPRE)
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1135 #endif
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1143   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1144   PetscErrorCode ierr;
1145   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1146   int            fd;
1147   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1148   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1149   PetscScalar    *column_values;
1150   PetscInt       message_count,flowcontrolcount;
1151   FILE           *file;
1152 
1153   PetscFunctionBegin;
1154   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1155   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1156   nz   = A->nz + B->nz;
1157   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1158   if (!rank) {
1159     header[0] = MAT_FILE_CLASSID;
1160     header[1] = mat->rmap->N;
1161     header[2] = mat->cmap->N;
1162 
1163     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1164     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1165     /* get largest number of rows any processor has */
1166     rlen  = mat->rmap->n;
1167     range = mat->rmap->range;
1168     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1169   } else {
1170     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1171     rlen = mat->rmap->n;
1172   }
1173 
1174   /* load up the local row counts */
1175   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1176   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1177 
1178   /* store the row lengths to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1182     for (i=1; i<size; i++) {
1183       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1184       rlen = range[i+1] - range[i];
1185       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1187     }
1188     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1189   } else {
1190     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1191     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1192     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1193   }
1194   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1195 
1196   /* load up the local column indices */
1197   nzmax = nz; /* th processor needs space a largest processor needs */
1198   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1199   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1200   cnt   = 0;
1201   for (i=0; i<mat->rmap->n; i++) {
1202     for (j=B->i[i]; j<B->i[i+1]; j++) {
1203       if ((col = garray[B->j[j]]) > cstart) break;
1204       column_indices[cnt++] = col;
1205     }
1206     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1207     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1208   }
1209   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1210 
1211   /* store the column indices to the file */
1212   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1213   if (!rank) {
1214     MPI_Status status;
1215     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1216     for (i=1; i<size; i++) {
1217       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1218       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1219       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1220       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1221       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     }
1223     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1224   } else {
1225     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1226     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1229   }
1230   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1231 
1232   /* load up the local column values */
1233   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1234   cnt  = 0;
1235   for (i=0; i<mat->rmap->n; i++) {
1236     for (j=B->i[i]; j<B->i[i+1]; j++) {
1237       if (garray[B->j[j]] > cstart) break;
1238       column_values[cnt++] = B->a[j];
1239     }
1240     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1241     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1242   }
1243   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1244 
1245   /* store the column values to the file */
1246   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1247   if (!rank) {
1248     MPI_Status status;
1249     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1250     for (i=1; i<size; i++) {
1251       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1252       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1253       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1254       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1255       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1256     }
1257     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1258   } else {
1259     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1260     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1263   }
1264   ierr = PetscFree(column_values);CHKERRQ(ierr);
1265 
1266   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1267   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #include <petscdraw.h>
1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1273 {
1274   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1275   PetscErrorCode    ierr;
1276   PetscMPIInt       rank = aij->rank,size = aij->size;
1277   PetscBool         isdraw,iascii,isbinary;
1278   PetscViewer       sviewer;
1279   PetscViewerFormat format;
1280 
1281   PetscFunctionBegin;
1282   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1283   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1284   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1285   if (iascii) {
1286     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscBool inodes;
1290 
1291       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1292       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1293       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1295       if (!inodes) {
1296         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1297                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1298       } else {
1299         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1300                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1301       }
1302       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1304       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1306       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1309       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1314       if (inodes) {
1315         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1316       } else {
1317         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1326       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1327     } else {
1328       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1336     if (isnull) PetscFunctionReturn(0);
1337   }
1338 
1339   {
1340     /* assemble the entire matrix onto first processor. */
1341     Mat        A;
1342     Mat_SeqAIJ *Aloc;
1343     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1344     MatScalar  *a;
1345 
1346     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1347     if (!rank) {
1348       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1349     } else {
1350       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1351     }
1352     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1353     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1354     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1355     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1356     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1357 
1358     /* copy over the A part */
1359     Aloc = (Mat_SeqAIJ*)aij->A->data;
1360     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1361     row  = mat->rmap->rstart;
1362     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1363     for (i=0; i<m; i++) {
1364       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1365       row++;
1366       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1367     }
1368     aj = Aloc->j;
1369     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1370 
1371     /* copy over the B part */
1372     Aloc = (Mat_SeqAIJ*)aij->B->data;
1373     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1374     row  = mat->rmap->rstart;
1375     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1376     ct   = cols;
1377     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1378     for (i=0; i<m; i++) {
1379       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1380       row++;
1381       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1382     }
1383     ierr = PetscFree(ct);CHKERRQ(ierr);
1384     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1385     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1386     /*
1387        Everyone has to call to draw the matrix since the graphics waits are
1388        synchronized across all processors that share the PetscDraw object
1389     */
1390     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1391     if (!rank) {
1392       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1393       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1394     }
1395     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1396     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1397     ierr = MatDestroy(&A);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1403 {
1404   PetscErrorCode ierr;
1405   PetscBool      iascii,isdraw,issocket,isbinary;
1406 
1407   PetscFunctionBegin;
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1409   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1410   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1411   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1412   if (iascii || isdraw || isbinary || issocket) {
1413     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1414   }
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1419 {
1420   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1421   PetscErrorCode ierr;
1422   Vec            bb1 = 0;
1423   PetscBool      hasop;
1424 
1425   PetscFunctionBegin;
1426   if (flag == SOR_APPLY_UPPER) {
1427     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1428     PetscFunctionReturn(0);
1429   }
1430 
1431   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1432     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1433   }
1434 
1435   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1436     if (flag & SOR_ZERO_INITIAL_GUESS) {
1437       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1438       its--;
1439     }
1440 
1441     while (its--) {
1442       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1444 
1445       /* update rhs: bb1 = bb - B*x */
1446       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1447       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1448 
1449       /* local sweep */
1450       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1451     }
1452   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1453     if (flag & SOR_ZERO_INITIAL_GUESS) {
1454       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1455       its--;
1456     }
1457     while (its--) {
1458       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1459       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460 
1461       /* update rhs: bb1 = bb - B*x */
1462       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1463       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1464 
1465       /* local sweep */
1466       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1467     }
1468   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1469     if (flag & SOR_ZERO_INITIAL_GUESS) {
1470       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1471       its--;
1472     }
1473     while (its--) {
1474       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1475       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476 
1477       /* update rhs: bb1 = bb - B*x */
1478       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1479       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1480 
1481       /* local sweep */
1482       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1483     }
1484   } else if (flag & SOR_EISENSTAT) {
1485     Vec xx1;
1486 
1487     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1488     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1489 
1490     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492     if (!mat->diag) {
1493       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1494       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1495     }
1496     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1497     if (hasop) {
1498       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1499     } else {
1500       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1501     }
1502     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1503 
1504     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1505 
1506     /* local sweep */
1507     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1508     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1509     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1510   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1511 
1512   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1513 
1514   matin->factorerrortype = mat->A->factorerrortype;
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1519 {
1520   Mat            aA,aB,Aperm;
1521   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1522   PetscScalar    *aa,*ba;
1523   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1524   PetscSF        rowsf,sf;
1525   IS             parcolp = NULL;
1526   PetscBool      done;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1531   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1532   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1533   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1534 
1535   /* Invert row permutation to find out where my rows should go */
1536   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1537   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1538   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1539   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1540   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1541   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1542 
1543   /* Invert column permutation to find out where my columns should go */
1544   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1545   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1546   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1547   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1548   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1549   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1550   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1551 
1552   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1553   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1554   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1555 
1556   /* Find out where my gcols should go */
1557   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1558   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1560   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1561   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1562   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1563   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1564   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1565 
1566   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1567   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1568   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1569   for (i=0; i<m; i++) {
1570     PetscInt row = rdest[i],rowner;
1571     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1572     for (j=ai[i]; j<ai[i+1]; j++) {
1573       PetscInt cowner,col = cdest[aj[j]];
1574       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1575       if (rowner == cowner) dnnz[i]++;
1576       else onnz[i]++;
1577     }
1578     for (j=bi[i]; j<bi[i+1]; j++) {
1579       PetscInt cowner,col = gcdest[bj[j]];
1580       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1581       if (rowner == cowner) dnnz[i]++;
1582       else onnz[i]++;
1583     }
1584   }
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1587   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1588   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1589   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1590 
1591   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1592   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1593   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1594   for (i=0; i<m; i++) {
1595     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1596     PetscInt j0,rowlen;
1597     rowlen = ai[i+1] - ai[i];
1598     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1599       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1600       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1601     }
1602     rowlen = bi[i+1] - bi[i];
1603     for (j0=j=0; j<rowlen; j0=j) {
1604       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1605       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1606     }
1607   }
1608   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1609   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1610   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1611   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1612   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1613   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1614   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1615   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1616   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1617   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1618   *B = Aperm;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1623 {
1624   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1625   PetscErrorCode ierr;
1626 
1627   PetscFunctionBegin;
1628   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1629   if (ghosts) *ghosts = aij->garray;
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1634 {
1635   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1636   Mat            A    = mat->A,B = mat->B;
1637   PetscErrorCode ierr;
1638   PetscReal      isend[5],irecv[5];
1639 
1640   PetscFunctionBegin;
1641   info->block_size = 1.0;
1642   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1643 
1644   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1645   isend[3] = info->memory;  isend[4] = info->mallocs;
1646 
1647   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1648 
1649   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1650   isend[3] += info->memory;  isend[4] += info->mallocs;
1651   if (flag == MAT_LOCAL) {
1652     info->nz_used      = isend[0];
1653     info->nz_allocated = isend[1];
1654     info->nz_unneeded  = isend[2];
1655     info->memory       = isend[3];
1656     info->mallocs      = isend[4];
1657   } else if (flag == MAT_GLOBAL_MAX) {
1658     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1659 
1660     info->nz_used      = irecv[0];
1661     info->nz_allocated = irecv[1];
1662     info->nz_unneeded  = irecv[2];
1663     info->memory       = irecv[3];
1664     info->mallocs      = irecv[4];
1665   } else if (flag == MAT_GLOBAL_SUM) {
1666     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1667 
1668     info->nz_used      = irecv[0];
1669     info->nz_allocated = irecv[1];
1670     info->nz_unneeded  = irecv[2];
1671     info->memory       = irecv[3];
1672     info->mallocs      = irecv[4];
1673   }
1674   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1675   info->fill_ratio_needed = 0;
1676   info->factor_mallocs    = 0;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1681 {
1682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   switch (op) {
1687   case MAT_NEW_NONZERO_LOCATIONS:
1688   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1689   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1690   case MAT_KEEP_NONZERO_PATTERN:
1691   case MAT_NEW_NONZERO_LOCATION_ERR:
1692   case MAT_USE_INODES:
1693   case MAT_IGNORE_ZERO_ENTRIES:
1694     MatCheckPreallocated(A,1);
1695     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1696     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1697     break;
1698   case MAT_ROW_ORIENTED:
1699     MatCheckPreallocated(A,1);
1700     a->roworiented = flg;
1701 
1702     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1703     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1704     break;
1705   case MAT_NEW_DIAGONALS:
1706     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   case MAT_SPD:
1712     A->spd_set = PETSC_TRUE;
1713     A->spd     = flg;
1714     if (flg) {
1715       A->symmetric                  = PETSC_TRUE;
1716       A->structurally_symmetric     = PETSC_TRUE;
1717       A->symmetric_set              = PETSC_TRUE;
1718       A->structurally_symmetric_set = PETSC_TRUE;
1719     }
1720     break;
1721   case MAT_SYMMETRIC:
1722     MatCheckPreallocated(A,1);
1723     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_STRUCTURALLY_SYMMETRIC:
1726     MatCheckPreallocated(A,1);
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_HERMITIAN:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     break;
1733   case MAT_SYMMETRY_ETERNAL:
1734     MatCheckPreallocated(A,1);
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_SUBMAT_SINGLEIS:
1738     A->submat_singleis = flg;
1739     break;
1740   case MAT_STRUCTURE_ONLY:
1741     /* The option is handled directly by MatSetOption() */
1742     break;
1743   default:
1744     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1750 {
1751   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1752   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1753   PetscErrorCode ierr;
1754   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1755   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1756   PetscInt       *cmap,*idx_p;
1757 
1758   PetscFunctionBegin;
1759   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1760   mat->getrowactive = PETSC_TRUE;
1761 
1762   if (!mat->rowvalues && (idx || v)) {
1763     /*
1764         allocate enough space to hold information from the longest row.
1765     */
1766     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1767     PetscInt   max = 1,tmp;
1768     for (i=0; i<matin->rmap->n; i++) {
1769       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1770       if (max < tmp) max = tmp;
1771     }
1772     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1773   }
1774 
1775   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1776   lrow = row - rstart;
1777 
1778   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1779   if (!v)   {pvA = 0; pvB = 0;}
1780   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1781   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1782   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1783   nztot = nzA + nzB;
1784 
1785   cmap = mat->garray;
1786   if (v  || idx) {
1787     if (nztot) {
1788       /* Sort by increasing column numbers, assuming A and B already sorted */
1789       PetscInt imark = -1;
1790       if (v) {
1791         *v = v_p = mat->rowvalues;
1792         for (i=0; i<nzB; i++) {
1793           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1794           else break;
1795         }
1796         imark = i;
1797         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1798         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1799       }
1800       if (idx) {
1801         *idx = idx_p = mat->rowindices;
1802         if (imark > -1) {
1803           for (i=0; i<imark; i++) {
1804             idx_p[i] = cmap[cworkB[i]];
1805           }
1806         } else {
1807           for (i=0; i<nzB; i++) {
1808             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1809             else break;
1810           }
1811           imark = i;
1812         }
1813         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1814         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1815       }
1816     } else {
1817       if (idx) *idx = 0;
1818       if (v)   *v   = 0;
1819     }
1820   }
1821   *nz  = nztot;
1822   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1823   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1828 {
1829   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1830 
1831   PetscFunctionBegin;
1832   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1833   aij->getrowactive = PETSC_FALSE;
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1838 {
1839   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1840   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1841   PetscErrorCode ierr;
1842   PetscInt       i,j,cstart = mat->cmap->rstart;
1843   PetscReal      sum = 0.0;
1844   MatScalar      *v;
1845 
1846   PetscFunctionBegin;
1847   if (aij->size == 1) {
1848     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1849   } else {
1850     if (type == NORM_FROBENIUS) {
1851       v = amat->a;
1852       for (i=0; i<amat->nz; i++) {
1853         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1854       }
1855       v = bmat->a;
1856       for (i=0; i<bmat->nz; i++) {
1857         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1858       }
1859       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1860       *norm = PetscSqrtReal(*norm);
1861       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1862     } else if (type == NORM_1) { /* max column norm */
1863       PetscReal *tmp,*tmp2;
1864       PetscInt  *jj,*garray = aij->garray;
1865       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1866       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1867       *norm = 0.0;
1868       v     = amat->a; jj = amat->j;
1869       for (j=0; j<amat->nz; j++) {
1870         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1871       }
1872       v = bmat->a; jj = bmat->j;
1873       for (j=0; j<bmat->nz; j++) {
1874         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1875       }
1876       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1877       for (j=0; j<mat->cmap->N; j++) {
1878         if (tmp2[j] > *norm) *norm = tmp2[j];
1879       }
1880       ierr = PetscFree(tmp);CHKERRQ(ierr);
1881       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1882       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1883     } else if (type == NORM_INFINITY) { /* max row norm */
1884       PetscReal ntemp = 0.0;
1885       for (j=0; j<aij->A->rmap->n; j++) {
1886         v   = amat->a + amat->i[j];
1887         sum = 0.0;
1888         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1889           sum += PetscAbsScalar(*v); v++;
1890         }
1891         v = bmat->a + bmat->i[j];
1892         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v); v++;
1894         }
1895         if (sum > ntemp) ntemp = sum;
1896       }
1897       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1898       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1899     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1905 {
1906   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1907   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1908   PetscErrorCode ierr;
1909   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1910   PetscInt       cstart = A->cmap->rstart,ncol;
1911   Mat            B;
1912   MatScalar      *array;
1913 
1914   PetscFunctionBegin;
1915   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1916 
1917   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1918   ai = Aloc->i; aj = Aloc->j;
1919   bi = Bloc->i; bj = Bloc->j;
1920   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1921     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1922     PetscSFNode          *oloc;
1923     PETSC_UNUSED PetscSF sf;
1924 
1925     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1926     /* compute d_nnz for preallocation */
1927     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1928     for (i=0; i<ai[ma]; i++) {
1929       d_nnz[aj[i]]++;
1930       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1931     }
1932     /* compute local off-diagonal contributions */
1933     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1934     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1935     /* map those to global */
1936     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1937     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1938     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1939     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1940     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1941     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1942     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1943 
1944     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1945     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1946     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1947     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1948     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1949     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1950   } else {
1951     B    = *matout;
1952     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1953     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1954   }
1955 
1956   /* copy over the A part */
1957   array = Aloc->a;
1958   row   = A->rmap->rstart;
1959   for (i=0; i<ma; i++) {
1960     ncol = ai[i+1]-ai[i];
1961     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1962     row++;
1963     array += ncol; aj += ncol;
1964   }
1965   aj = Aloc->j;
1966   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1967 
1968   /* copy over the B part */
1969   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1970   array = Bloc->a;
1971   row   = A->rmap->rstart;
1972   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1973   cols_tmp = cols;
1974   for (i=0; i<mb; i++) {
1975     ncol = bi[i+1]-bi[i];
1976     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1977     row++;
1978     array += ncol; cols_tmp += ncol;
1979   }
1980   ierr = PetscFree(cols);CHKERRQ(ierr);
1981 
1982   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1983   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1984   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1985     *matout = B;
1986   } else {
1987     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1993 {
1994   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1995   Mat            a    = aij->A,b = aij->B;
1996   PetscErrorCode ierr;
1997   PetscInt       s1,s2,s3;
1998 
1999   PetscFunctionBegin;
2000   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2001   if (rr) {
2002     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2003     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2004     /* Overlap communication with computation. */
2005     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2006   }
2007   if (ll) {
2008     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2009     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2010     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2011   }
2012   /* scale  the diagonal block */
2013   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2014 
2015   if (rr) {
2016     /* Do a scatter end and then right scale the off-diagonal block */
2017     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2018     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2019   }
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2024 {
2025   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2026   PetscErrorCode ierr;
2027 
2028   PetscFunctionBegin;
2029   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2034 {
2035   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2036   Mat            a,b,c,d;
2037   PetscBool      flg;
2038   PetscErrorCode ierr;
2039 
2040   PetscFunctionBegin;
2041   a = matA->A; b = matA->B;
2042   c = matB->A; d = matB->B;
2043 
2044   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2045   if (flg) {
2046     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2047   }
2048   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2053 {
2054   PetscErrorCode ierr;
2055   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2056   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2057 
2058   PetscFunctionBegin;
2059   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2060   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2061     /* because of the column compression in the off-processor part of the matrix a->B,
2062        the number of columns in a->B and b->B may be different, hence we cannot call
2063        the MatCopy() directly on the two parts. If need be, we can provide a more
2064        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2065        then copying the submatrices */
2066     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2067   } else {
2068     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2069     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2070   }
2071   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2076 {
2077   PetscErrorCode ierr;
2078 
2079   PetscFunctionBegin;
2080   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 /*
2085    Computes the number of nonzeros per row needed for preallocation when X and Y
2086    have different nonzero structure.
2087 */
2088 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2089 {
2090   PetscInt       i,j,k,nzx,nzy;
2091 
2092   PetscFunctionBegin;
2093   /* Set the number of nonzeros in the new matrix */
2094   for (i=0; i<m; i++) {
2095     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2096     nzx = xi[i+1] - xi[i];
2097     nzy = yi[i+1] - yi[i];
2098     nnz[i] = 0;
2099     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2100       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2101       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2102       nnz[i]++;
2103     }
2104     for (; k<nzy; k++) nnz[i]++;
2105   }
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2110 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2111 {
2112   PetscErrorCode ierr;
2113   PetscInt       m = Y->rmap->N;
2114   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2115   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2116 
2117   PetscFunctionBegin;
2118   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2123 {
2124   PetscErrorCode ierr;
2125   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2126   PetscBLASInt   bnz,one=1;
2127   Mat_SeqAIJ     *x,*y;
2128 
2129   PetscFunctionBegin;
2130   if (str == SAME_NONZERO_PATTERN) {
2131     PetscScalar alpha = a;
2132     x    = (Mat_SeqAIJ*)xx->A->data;
2133     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2134     y    = (Mat_SeqAIJ*)yy->A->data;
2135     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2136     x    = (Mat_SeqAIJ*)xx->B->data;
2137     y    = (Mat_SeqAIJ*)yy->B->data;
2138     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2139     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2140     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2141   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2142     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2143   } else {
2144     Mat      B;
2145     PetscInt *nnz_d,*nnz_o;
2146     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2147     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2148     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2149     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2150     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2151     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2152     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2153     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2154     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2155     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2156     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2157     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2158     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2159     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2165 
2166 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2167 {
2168 #if defined(PETSC_USE_COMPLEX)
2169   PetscErrorCode ierr;
2170   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2174   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2175 #else
2176   PetscFunctionBegin;
2177 #endif
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2182 {
2183   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2188   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2193 {
2194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2195   PetscErrorCode ierr;
2196 
2197   PetscFunctionBegin;
2198   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2199   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207   PetscInt       i,*idxb = 0;
2208   PetscScalar    *va,*vb;
2209   Vec            vtmp;
2210 
2211   PetscFunctionBegin;
2212   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2213   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2214   if (idx) {
2215     for (i=0; i<A->rmap->n; i++) {
2216       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2217     }
2218   }
2219 
2220   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2221   if (idx) {
2222     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2223   }
2224   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2225   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2226 
2227   for (i=0; i<A->rmap->n; i++) {
2228     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2229       va[i] = vb[i];
2230       if (idx) idx[i] = a->garray[idxb[i]];
2231     }
2232   }
2233 
2234   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2235   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2236   ierr = PetscFree(idxb);CHKERRQ(ierr);
2237   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2242 {
2243   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2244   PetscErrorCode ierr;
2245   PetscInt       i,*idxb = 0;
2246   PetscScalar    *va,*vb;
2247   Vec            vtmp;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2251   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2252   if (idx) {
2253     for (i=0; i<A->cmap->n; i++) {
2254       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2255     }
2256   }
2257 
2258   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2259   if (idx) {
2260     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2261   }
2262   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2263   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2264 
2265   for (i=0; i<A->rmap->n; i++) {
2266     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2267       va[i] = vb[i];
2268       if (idx) idx[i] = a->garray[idxb[i]];
2269     }
2270   }
2271 
2272   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2273   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2274   ierr = PetscFree(idxb);CHKERRQ(ierr);
2275   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2280 {
2281   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2282   PetscInt       n      = A->rmap->n;
2283   PetscInt       cstart = A->cmap->rstart;
2284   PetscInt       *cmap  = mat->garray;
2285   PetscInt       *diagIdx, *offdiagIdx;
2286   Vec            diagV, offdiagV;
2287   PetscScalar    *a, *diagA, *offdiagA;
2288   PetscInt       r;
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2293   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2294   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2295   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2296   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2297   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2298   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2299   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2300   for (r = 0; r < n; ++r) {
2301     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2302       a[r]   = diagA[r];
2303       idx[r] = cstart + diagIdx[r];
2304     } else {
2305       a[r]   = offdiagA[r];
2306       idx[r] = cmap[offdiagIdx[r]];
2307     }
2308   }
2309   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2310   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2311   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2312   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2313   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2314   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2319 {
2320   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2321   PetscInt       n      = A->rmap->n;
2322   PetscInt       cstart = A->cmap->rstart;
2323   PetscInt       *cmap  = mat->garray;
2324   PetscInt       *diagIdx, *offdiagIdx;
2325   Vec            diagV, offdiagV;
2326   PetscScalar    *a, *diagA, *offdiagA;
2327   PetscInt       r;
2328   PetscErrorCode ierr;
2329 
2330   PetscFunctionBegin;
2331   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2332   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2333   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2334   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2335   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2336   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2337   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2338   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2339   for (r = 0; r < n; ++r) {
2340     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2341       a[r]   = diagA[r];
2342       idx[r] = cstart + diagIdx[r];
2343     } else {
2344       a[r]   = offdiagA[r];
2345       idx[r] = cmap[offdiagIdx[r]];
2346     }
2347   }
2348   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2349   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2350   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2351   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2352   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2353   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2358 {
2359   PetscErrorCode ierr;
2360   Mat            *dummy;
2361 
2362   PetscFunctionBegin;
2363   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2364   *newmat = *dummy;
2365   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2370 {
2371   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2376   A->factorerrortype = a->A->factorerrortype;
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2381 {
2382   PetscErrorCode ierr;
2383   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2387   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2388   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2394 {
2395   PetscFunctionBegin;
2396   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2397   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 /*@
2402    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2403 
2404    Collective on Mat
2405 
2406    Input Parameters:
2407 +    A - the matrix
2408 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2409 
2410  Level: advanced
2411 
2412 @*/
2413 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2414 {
2415   PetscErrorCode       ierr;
2416 
2417   PetscFunctionBegin;
2418   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2423 {
2424   PetscErrorCode       ierr;
2425   PetscBool            sc = PETSC_FALSE,flg;
2426 
2427   PetscFunctionBegin;
2428   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2429   ierr = PetscObjectOptionsBegin((PetscObject)A);
2430     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2431     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2432     if (flg) {
2433       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2434     }
2435   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2440 {
2441   PetscErrorCode ierr;
2442   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2443   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2444 
2445   PetscFunctionBegin;
2446   if (!Y->preallocated) {
2447     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2448   } else if (!aij->nz) {
2449     PetscInt nonew = aij->nonew;
2450     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2451     aij->nonew = nonew;
2452   }
2453   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2458 {
2459   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2460   PetscErrorCode ierr;
2461 
2462   PetscFunctionBegin;
2463   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2464   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2465   if (d) {
2466     PetscInt rstart;
2467     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2468     *d += rstart;
2469 
2470   }
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 
2475 /* -------------------------------------------------------------------*/
2476 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2477                                        MatGetRow_MPIAIJ,
2478                                        MatRestoreRow_MPIAIJ,
2479                                        MatMult_MPIAIJ,
2480                                 /* 4*/ MatMultAdd_MPIAIJ,
2481                                        MatMultTranspose_MPIAIJ,
2482                                        MatMultTransposeAdd_MPIAIJ,
2483                                        0,
2484                                        0,
2485                                        0,
2486                                 /*10*/ 0,
2487                                        0,
2488                                        0,
2489                                        MatSOR_MPIAIJ,
2490                                        MatTranspose_MPIAIJ,
2491                                 /*15*/ MatGetInfo_MPIAIJ,
2492                                        MatEqual_MPIAIJ,
2493                                        MatGetDiagonal_MPIAIJ,
2494                                        MatDiagonalScale_MPIAIJ,
2495                                        MatNorm_MPIAIJ,
2496                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2497                                        MatAssemblyEnd_MPIAIJ,
2498                                        MatSetOption_MPIAIJ,
2499                                        MatZeroEntries_MPIAIJ,
2500                                 /*24*/ MatZeroRows_MPIAIJ,
2501                                        0,
2502                                        0,
2503                                        0,
2504                                        0,
2505                                 /*29*/ MatSetUp_MPIAIJ,
2506                                        0,
2507                                        0,
2508                                        MatGetDiagonalBlock_MPIAIJ,
2509                                        0,
2510                                 /*34*/ MatDuplicate_MPIAIJ,
2511                                        0,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                 /*39*/ MatAXPY_MPIAIJ,
2516                                        MatCreateSubMatrices_MPIAIJ,
2517                                        MatIncreaseOverlap_MPIAIJ,
2518                                        MatGetValues_MPIAIJ,
2519                                        MatCopy_MPIAIJ,
2520                                 /*44*/ MatGetRowMax_MPIAIJ,
2521                                        MatScale_MPIAIJ,
2522                                        MatShift_MPIAIJ,
2523                                        MatDiagonalSet_MPIAIJ,
2524                                        MatZeroRowsColumns_MPIAIJ,
2525                                 /*49*/ MatSetRandom_MPIAIJ,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                        0,
2530                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2531                                        0,
2532                                        MatSetUnfactored_MPIAIJ,
2533                                        MatPermute_MPIAIJ,
2534                                        0,
2535                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2536                                        MatDestroy_MPIAIJ,
2537                                        MatView_MPIAIJ,
2538                                        0,
2539                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2540                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2541                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2546                                        MatGetRowMinAbs_MPIAIJ,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                 /*75*/ MatFDColoringApply_AIJ,
2552                                        MatSetFromOptions_MPIAIJ,
2553                                        0,
2554                                        0,
2555                                        MatFindZeroDiagonals_MPIAIJ,
2556                                 /*80*/ 0,
2557                                        0,
2558                                        0,
2559                                 /*83*/ MatLoad_MPIAIJ,
2560                                        MatIsSymmetric_MPIAIJ,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                        0,
2565                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2566                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2567                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2568                                        MatPtAP_MPIAIJ_MPIAIJ,
2569                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2570                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                 /*99*/ 0,
2576                                        0,
2577                                        0,
2578                                        MatConjugate_MPIAIJ,
2579                                        0,
2580                                 /*104*/MatSetValuesRow_MPIAIJ,
2581                                        MatRealPart_MPIAIJ,
2582                                        MatImaginaryPart_MPIAIJ,
2583                                        0,
2584                                        0,
2585                                 /*109*/0,
2586                                        0,
2587                                        MatGetRowMin_MPIAIJ,
2588                                        0,
2589                                        MatMissingDiagonal_MPIAIJ,
2590                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2591                                        0,
2592                                        MatGetGhosts_MPIAIJ,
2593                                        0,
2594                                        0,
2595                                 /*119*/0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                        MatGetMultiProcBlock_MPIAIJ,
2600                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2601                                        MatGetColumnNorms_MPIAIJ,
2602                                        MatInvertBlockDiagonal_MPIAIJ,
2603                                        0,
2604                                        MatCreateSubMatricesMPI_MPIAIJ,
2605                                 /*129*/0,
2606                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2607                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2608                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2609                                        0,
2610                                 /*134*/0,
2611                                        0,
2612                                        MatRARt_MPIAIJ_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                 /*139*/MatSetBlockSizes_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        MatFDColoringSetUp_MPIXAIJ,
2619                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2620                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2621 };
2622 
2623 /* ----------------------------------------------------------------------------------------*/
2624 
2625 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2626 {
2627   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2628   PetscErrorCode ierr;
2629 
2630   PetscFunctionBegin;
2631   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2632   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2637 {
2638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2639   PetscErrorCode ierr;
2640 
2641   PetscFunctionBegin;
2642   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2643   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2648 {
2649   Mat_MPIAIJ     *b;
2650   PetscErrorCode ierr;
2651 
2652   PetscFunctionBegin;
2653   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2654   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2655   b = (Mat_MPIAIJ*)B->data;
2656 
2657 #if defined(PETSC_USE_CTABLE)
2658   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2659 #else
2660   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2661 #endif
2662   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2663   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2664   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2665 
2666   /* Because the B will have been resized we simply destroy it and create a new one each time */
2667   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2668   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2669   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2670   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2671   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2672   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2673 
2674   if (!B->preallocated) {
2675     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2676     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2677     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2678     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2679     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2680   }
2681 
2682   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2683   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2684   B->preallocated  = PETSC_TRUE;
2685   B->was_assembled = PETSC_FALSE;
2686   B->assembled     = PETSC_FALSE;;
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2691 {
2692   Mat_MPIAIJ     *b;
2693   PetscErrorCode ierr;
2694 
2695   PetscFunctionBegin;
2696   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2697   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2698   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2699   b = (Mat_MPIAIJ*)B->data;
2700 
2701 #if defined(PETSC_USE_CTABLE)
2702   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2703 #else
2704   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2705 #endif
2706   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2707   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2708   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2709 
2710   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2711   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2712   B->preallocated  = PETSC_TRUE;
2713   B->was_assembled = PETSC_FALSE;
2714   B->assembled = PETSC_FALSE;
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2719 {
2720   Mat            mat;
2721   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2722   PetscErrorCode ierr;
2723 
2724   PetscFunctionBegin;
2725   *newmat = 0;
2726   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2727   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2728   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2729   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2730   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2731   a       = (Mat_MPIAIJ*)mat->data;
2732 
2733   mat->factortype   = matin->factortype;
2734   mat->assembled    = PETSC_TRUE;
2735   mat->insertmode   = NOT_SET_VALUES;
2736   mat->preallocated = PETSC_TRUE;
2737 
2738   a->size         = oldmat->size;
2739   a->rank         = oldmat->rank;
2740   a->donotstash   = oldmat->donotstash;
2741   a->roworiented  = oldmat->roworiented;
2742   a->rowindices   = 0;
2743   a->rowvalues    = 0;
2744   a->getrowactive = PETSC_FALSE;
2745 
2746   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2747   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2748 
2749   if (oldmat->colmap) {
2750 #if defined(PETSC_USE_CTABLE)
2751     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2752 #else
2753     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2754     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2755     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2756 #endif
2757   } else a->colmap = 0;
2758   if (oldmat->garray) {
2759     PetscInt len;
2760     len  = oldmat->B->cmap->n;
2761     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2762     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2763     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2764   } else a->garray = 0;
2765 
2766   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2767   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2768   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2769   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2770   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2771   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2772   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2773   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2774   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2775   *newmat = mat;
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2780 {
2781   PetscScalar    *vals,*svals;
2782   MPI_Comm       comm;
2783   PetscErrorCode ierr;
2784   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2785   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2786   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2787   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2788   PetscInt       cend,cstart,n,*rowners;
2789   int            fd;
2790   PetscInt       bs = newMat->rmap->bs;
2791 
2792   PetscFunctionBegin;
2793   /* force binary viewer to load .info file if it has not yet done so */
2794   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2795   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2796   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2797   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2798   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2799   if (!rank) {
2800     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2801     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2802     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2803   }
2804 
2805   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2806   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2807   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2808   if (bs < 0) bs = 1;
2809 
2810   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2811   M    = header[1]; N = header[2];
2812 
2813   /* If global sizes are set, check if they are consistent with that given in the file */
2814   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2815   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2816 
2817   /* determine ownership of all (block) rows */
2818   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2819   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2820   else m = newMat->rmap->n; /* Set by user */
2821 
2822   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2823   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2824 
2825   /* First process needs enough room for process with most rows */
2826   if (!rank) {
2827     mmax = rowners[1];
2828     for (i=2; i<=size; i++) {
2829       mmax = PetscMax(mmax, rowners[i]);
2830     }
2831   } else mmax = -1;             /* unused, but compilers complain */
2832 
2833   rowners[0] = 0;
2834   for (i=2; i<=size; i++) {
2835     rowners[i] += rowners[i-1];
2836   }
2837   rstart = rowners[rank];
2838   rend   = rowners[rank+1];
2839 
2840   /* distribute row lengths to all processors */
2841   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2842   if (!rank) {
2843     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2844     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2845     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2846     for (j=0; j<m; j++) {
2847       procsnz[0] += ourlens[j];
2848     }
2849     for (i=1; i<size; i++) {
2850       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2851       /* calculate the number of nonzeros on each processor */
2852       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2853         procsnz[i] += rowlengths[j];
2854       }
2855       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2856     }
2857     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2858   } else {
2859     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2860   }
2861 
2862   if (!rank) {
2863     /* determine max buffer needed and allocate it */
2864     maxnz = 0;
2865     for (i=0; i<size; i++) {
2866       maxnz = PetscMax(maxnz,procsnz[i]);
2867     }
2868     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2869 
2870     /* read in my part of the matrix column indices  */
2871     nz   = procsnz[0];
2872     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2873     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2874 
2875     /* read in every one elses and ship off */
2876     for (i=1; i<size; i++) {
2877       nz   = procsnz[i];
2878       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2879       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2880     }
2881     ierr = PetscFree(cols);CHKERRQ(ierr);
2882   } else {
2883     /* determine buffer space needed for message */
2884     nz = 0;
2885     for (i=0; i<m; i++) {
2886       nz += ourlens[i];
2887     }
2888     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2889 
2890     /* receive message of column indices*/
2891     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2892   }
2893 
2894   /* determine column ownership if matrix is not square */
2895   if (N != M) {
2896     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2897     else n = newMat->cmap->n;
2898     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2899     cstart = cend - n;
2900   } else {
2901     cstart = rstart;
2902     cend   = rend;
2903     n      = cend - cstart;
2904   }
2905 
2906   /* loop over local rows, determining number of off diagonal entries */
2907   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2908   jj   = 0;
2909   for (i=0; i<m; i++) {
2910     for (j=0; j<ourlens[i]; j++) {
2911       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2912       jj++;
2913     }
2914   }
2915 
2916   for (i=0; i<m; i++) {
2917     ourlens[i] -= offlens[i];
2918   }
2919   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2920 
2921   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2922 
2923   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2924 
2925   for (i=0; i<m; i++) {
2926     ourlens[i] += offlens[i];
2927   }
2928 
2929   if (!rank) {
2930     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2931 
2932     /* read in my part of the matrix numerical values  */
2933     nz   = procsnz[0];
2934     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2935 
2936     /* insert into matrix */
2937     jj      = rstart;
2938     smycols = mycols;
2939     svals   = vals;
2940     for (i=0; i<m; i++) {
2941       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2942       smycols += ourlens[i];
2943       svals   += ourlens[i];
2944       jj++;
2945     }
2946 
2947     /* read in other processors and ship out */
2948     for (i=1; i<size; i++) {
2949       nz   = procsnz[i];
2950       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2951       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2954   } else {
2955     /* receive numeric values */
2956     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2957 
2958     /* receive message of values*/
2959     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2960 
2961     /* insert into matrix */
2962     jj      = rstart;
2963     smycols = mycols;
2964     svals   = vals;
2965     for (i=0; i<m; i++) {
2966       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967       smycols += ourlens[i];
2968       svals   += ourlens[i];
2969       jj++;
2970     }
2971   }
2972   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2973   ierr = PetscFree(vals);CHKERRQ(ierr);
2974   ierr = PetscFree(mycols);CHKERRQ(ierr);
2975   ierr = PetscFree(rowners);CHKERRQ(ierr);
2976   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 /* Not scalable because of ISAllGather() unless getting all columns. */
2982 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2983 {
2984   PetscErrorCode ierr;
2985   IS             iscol_local;
2986   PetscBool      isstride;
2987   PetscMPIInt    lisstride=0,gisstride;
2988 
2989   PetscFunctionBegin;
2990   /* check if we are grabbing all columns*/
2991   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2992 
2993   if (isstride) {
2994     PetscInt  start,len,mstart,mlen;
2995     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2996     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2997     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2998     if (mstart == start && mlen-mstart == len) lisstride = 1;
2999   }
3000 
3001   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3002   if (gisstride) {
3003     PetscInt N;
3004     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3005     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3006     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3007     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3008   } else {
3009     PetscInt cbs;
3010     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3011     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3012     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3013   }
3014 
3015   *isseq = iscol_local;
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 /*
3020  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3021  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3022 
3023  Input Parameters:
3024    mat - matrix
3025    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3026            i.e., mat->rstart <= isrow[i] < mat->rend
3027    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3028            i.e., mat->cstart <= iscol[i] < mat->cend
3029  Output Parameter:
3030    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3031    iscol_o - sequential column index set for retrieving mat->B
3032    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3033  */
3034 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3035 {
3036   PetscErrorCode ierr;
3037   Vec            x,cmap;
3038   const PetscInt *is_idx;
3039   PetscScalar    *xarray,*cmaparray;
3040   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3041   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3042   Mat            B=a->B;
3043   Vec            lvec=a->lvec,lcmap;
3044   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3045   MPI_Comm       comm;
3046   PetscMPIInt    rank;
3047   VecScatter     Mvctx;
3048 
3049   PetscFunctionBegin;
3050   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3052   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3053 
3054   //ierr = MatView(mat,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3055   //ierr = ISView(iscol,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3056 
3057   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3058   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3059   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3060   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3061   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3062 
3063   /* Get start indices */
3064   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3065   isstart -= ncols;
3066   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3067 
3068   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3069   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3070   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3071   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3072   for (i=0; i<ncols; i++) {
3073     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3074     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3075     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3076   }
3077   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3078   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3079   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3080 
3081   /* Get iscol_d */
3082   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3083   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3084   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3085 
3086   /* Get isrow_d */
3087   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3088   rstart = mat->rmap->rstart;
3089   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3090   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3091   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3092   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3093 
3094   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3095   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3096   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3097 
3098   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3099   Mvctx = a->Mvctx;
3100   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3101   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3102 
3103   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3104 
3105   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3106   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3107 
3108   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3109   /* off-process column indices */
3110   count = 0;
3111   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3112   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3113 
3114   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3115   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3116   for (i=0; i<Bn; i++) {
3117     if (PetscRealPart(xarray[i]) > -1.0) {
3118       idx[count]     = i;                   /* local column index in off-diagonal part B */
3119       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3120       count++;
3121     }
3122   }
3123   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3124   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3125 
3126   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3127   /* cannot ensure iscol_o has same blocksize as iscol! */
3128 
3129   ierr = PetscFree(idx);CHKERRQ(ierr);
3130   *garray = cmap1;
3131 
3132   ierr = VecDestroy(&x);CHKERRQ(ierr);
3133   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3134   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3135   PetscFunctionReturn(0);
3136 }
3137 
3138 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3139 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3140 {
3141   PetscErrorCode ierr;
3142   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3143   Mat            M = NULL;
3144   MPI_Comm       comm;
3145   IS             iscol_d,isrow_d,iscol_o;
3146   Mat            Asub = NULL,Bsub = NULL;
3147   PetscInt       n;
3148 
3149   PetscFunctionBegin;
3150   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3151 
3152   if (call == MAT_REUSE_MATRIX) {
3153     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3154     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3155     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3156 
3157     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3158     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3159 
3160     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3161     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3162 
3163     /* Update diagonal and off-diagonal portions of submat */
3164     asub = (Mat_MPIAIJ*)(*submat)->data;
3165     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3166     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3167     if (n) {
3168       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3169     }
3170     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3171     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3172 
3173   } else { /* call == MAT_INITIAL_MATRIX) */
3174     const PetscInt *garray;
3175     PetscInt        BsubN;
3176 
3177     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3178     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3179 
3180     /* Create local submatrices Asub and Bsub */
3181     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3182     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3183 
3184     /* Create submatrix M */
3185     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3186 
3187     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3188     asub = (Mat_MPIAIJ*)M->data;
3189 
3190     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3191     n = asub->B->cmap->N;
3192     if (BsubN > n) {
3193       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3194       const PetscInt *idx;
3195       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3196       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3197 
3198       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3199       j = 0;
3200       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3201       for (i=0; i<n; i++) {
3202         if (j >= BsubN) break;
3203         while (subgarray[i] > garray[j]) j++;
3204 
3205         if (subgarray[i] == garray[j]) {
3206           idx_new[i] = idx[j++];
3207         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3208       }
3209       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3210 
3211       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3212       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3213 
3214     } else if (BsubN < n) {
3215       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3216     }
3217 
3218     ierr = PetscFree(garray);CHKERRQ(ierr);
3219     *submat = M;
3220 
3221     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3222     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3223     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3224 
3225     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3226     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3227 
3228     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3229     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3230   }
3231   PetscFunctionReturn(0);
3232 }
3233 
3234 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3235 {
3236   PetscErrorCode ierr;
3237   IS             iscol_local=NULL,isrow_d;
3238   PetscInt       csize;
3239   PetscInt       n,i,j,start,end;
3240   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3241   MPI_Comm       comm;
3242 
3243   PetscFunctionBegin;
3244   /* If isrow has same processor distribution as mat,
3245      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3246   if (call == MAT_REUSE_MATRIX) {
3247     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3248     if (isrow_d) {
3249       sameRowDist  = PETSC_TRUE;
3250       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3251     } else {
3252       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3253       if (iscol_local) {
3254         sameRowDist  = PETSC_TRUE;
3255         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3256       }
3257     }
3258   } else {
3259     /* Check if isrow has same processor distribution as mat */
3260     sameDist[0] = PETSC_FALSE;
3261     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3262     if (!n) {
3263       sameDist[0] = PETSC_TRUE;
3264     } else {
3265       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3266       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3267       if (i >= start && j < end) {
3268         sameDist[0] = PETSC_TRUE;
3269       }
3270     }
3271 
3272     /* Check if iscol has same processor distribution as mat */
3273     sameDist[1] = PETSC_FALSE;
3274     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3275     if (!n) {
3276       sameDist[1] = PETSC_TRUE;
3277     } else {
3278       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3279       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3280       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3281     }
3282 
3283     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3284     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3285     sameRowDist = tsameDist[0];
3286   }
3287 
3288   if (sameRowDist) {
3289     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3290       /* isrow and iscol have same processor distribution as mat */
3291       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3292       PetscFunctionReturn(0);
3293     } else { /* sameRowDist */
3294       /* isrow has same processor distribution as mat */
3295       if (call == MAT_INITIAL_MATRIX) {
3296         PetscBool sorted;
3297         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3298         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3299         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3300         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3301 
3302         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3303         if (sorted) {
3304           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3305           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3306           PetscFunctionReturn(0);
3307         }
3308       } else { /* call == MAT_REUSE_MATRIX */
3309         IS    iscol_sub;
3310         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3311         if (iscol_sub) {
3312           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3313           PetscFunctionReturn(0);
3314         }
3315       }
3316     }
3317   }
3318 
3319   /* General case: iscol -> iscol_local which has global size of iscol */
3320   if (call == MAT_REUSE_MATRIX) {
3321     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3322     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3323   } else {
3324     if (!iscol_local) {
3325       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3326     }
3327   }
3328 
3329   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3330   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3331 
3332   if (call == MAT_INITIAL_MATRIX) {
3333     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3334     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3335   }
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 /*@C
3340      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3341          and "off-diagonal" part of the matrix in CSR format.
3342 
3343    Collective on MPI_Comm
3344 
3345    Input Parameters:
3346 +  comm - MPI communicator
3347 .  A - "diagonal" portion of matrix
3348 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3349 -  garray - global index of B columns
3350 
3351    Output Parameter:
3352 .   mat - the matrix, with input A as its local diagonal matrix
3353    Level: advanced
3354 
3355    Notes:
3356        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3357        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3358 
3359 .seealso: MatCreateMPIAIJWithSplitArrays()
3360 @*/
3361 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3362 {
3363   PetscErrorCode ierr;
3364   Mat_MPIAIJ     *maij;
3365   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3366   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3367   PetscScalar    *oa=b->a;
3368   Mat            Bnew;
3369   PetscInt       m,n,N;
3370 
3371   PetscFunctionBegin;
3372   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3373   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3374   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3375   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3376   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3377   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3378 
3379   /* Get global columns of mat */
3380   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3381 
3382   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3383   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3384   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3385   maij = (Mat_MPIAIJ*)(*mat)->data;
3386 
3387   (*mat)->preallocated = PETSC_TRUE;
3388 
3389   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3390   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3391 
3392   /* Set A as diagonal portion of *mat */
3393   maij->A = A;
3394 
3395   nz = oi[m];
3396   for (i=0; i<nz; i++) {
3397     col   = oj[i];
3398     oj[i] = garray[col];
3399   }
3400 
3401    /* Set Bnew as off-diagonal portion of *mat */
3402   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3403   bnew        = (Mat_SeqAIJ*)Bnew->data;
3404   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3405   maij->B     = Bnew;
3406 
3407   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3408 
3409   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3410   b->free_a       = PETSC_FALSE;
3411   b->free_ij      = PETSC_FALSE;
3412   ierr = MatDestroy(&B);CHKERRQ(ierr);
3413 
3414   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3415   bnew->free_a       = PETSC_TRUE;
3416   bnew->free_ij      = PETSC_TRUE;
3417 
3418   /* condense columns of maij->B */
3419   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3420   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3421   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3422   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3423   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3424   PetscFunctionReturn(0);
3425 }
3426 
3427 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3428 
3429 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3430 {
3431   PetscErrorCode ierr;
3432   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3433   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3434   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3435   Mat            M,Msub,B=a->B;
3436   MatScalar      *aa;
3437   Mat_SeqAIJ     *aij;
3438   PetscInt       *garray = a->garray,*colsub,Ncols;
3439   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3440   IS             iscol_sub,iscmap;
3441   const PetscInt *is_idx,*cmap;
3442   PetscBool      allcolumns=PETSC_FALSE;
3443   MPI_Comm       comm;
3444 
3445   PetscFunctionBegin;
3446   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3447 
3448   if (call == MAT_REUSE_MATRIX) {
3449     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3450     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3451     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3452 
3453     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3454     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3455 
3456     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3457     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3458 
3459     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3460 
3461   } else { /* call == MAT_INITIAL_MATRIX) */
3462     PetscBool flg;
3463 
3464     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3465     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3466 
3467     /* (1) iscol -> nonscalable iscol_local */
3468     /* Check for special case: each processor gets entire matrix columns */
3469     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3470     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3471     if (allcolumns) {
3472       iscol_sub = iscol_local;
3473       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3474       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3475 
3476     } else {
3477       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3478       PetscInt *idx,*cmap1,k;
3479       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3480       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3481       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3482       count = 0;
3483       k     = 0;
3484       for (i=0; i<Ncols; i++) {
3485         j = is_idx[i];
3486         if (j >= cstart && j < cend) {
3487           /* diagonal part of mat */
3488           idx[count]     = j;
3489           cmap1[count++] = i; /* column index in submat */
3490         } else if (Bn) {
3491           /* off-diagonal part of mat */
3492           if (j == garray[k]) {
3493             idx[count]     = j;
3494             cmap1[count++] = i;  /* column index in submat */
3495           } else if (j > garray[k]) {
3496             while (j > garray[k] && k < Bn-1) k++;
3497             if (j == garray[k]) {
3498               idx[count]     = j;
3499               cmap1[count++] = i; /* column index in submat */
3500             }
3501           }
3502         }
3503       }
3504       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3505 
3506       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3507       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3508       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3509 
3510       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3511     }
3512 
3513     /* (3) Create sequential Msub */
3514     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3515   }
3516 
3517   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3518   aij  = (Mat_SeqAIJ*)(Msub)->data;
3519   ii   = aij->i;
3520   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3521 
3522   /*
3523       m - number of local rows
3524       Ncols - number of columns (same on all processors)
3525       rstart - first row in new global matrix generated
3526   */
3527   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3528 
3529   if (call == MAT_INITIAL_MATRIX) {
3530     /* (4) Create parallel newmat */
3531     PetscMPIInt    rank,size;
3532     PetscInt       csize;
3533 
3534     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3535     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3536 
3537     /*
3538         Determine the number of non-zeros in the diagonal and off-diagonal
3539         portions of the matrix in order to do correct preallocation
3540     */
3541 
3542     /* first get start and end of "diagonal" columns */
3543     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3544     if (csize == PETSC_DECIDE) {
3545       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3546       if (mglobal == Ncols) { /* square matrix */
3547         nlocal = m;
3548       } else {
3549         nlocal = Ncols/size + ((Ncols % size) > rank);
3550       }
3551     } else {
3552       nlocal = csize;
3553     }
3554     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3555     rstart = rend - nlocal;
3556     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3557 
3558     /* next, compute all the lengths */
3559     jj    = aij->j;
3560     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3561     olens = dlens + m;
3562     for (i=0; i<m; i++) {
3563       jend = ii[i+1] - ii[i];
3564       olen = 0;
3565       dlen = 0;
3566       for (j=0; j<jend; j++) {
3567         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3568         else dlen++;
3569         jj++;
3570       }
3571       olens[i] = olen;
3572       dlens[i] = dlen;
3573     }
3574 
3575     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3576     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3577 
3578     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3579     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3580     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3581     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3582     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3583     ierr = PetscFree(dlens);CHKERRQ(ierr);
3584 
3585   } else { /* call == MAT_REUSE_MATRIX */
3586     M    = *newmat;
3587     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3588     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3589     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3590     /*
3591          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3592        rather than the slower MatSetValues().
3593     */
3594     M->was_assembled = PETSC_TRUE;
3595     M->assembled     = PETSC_FALSE;
3596   }
3597 
3598   /* (5) Set values of Msub to *newmat */
3599   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3600   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3601 
3602   jj   = aij->j;
3603   aa   = aij->a;
3604   for (i=0; i<m; i++) {
3605     row = rstart + i;
3606     nz  = ii[i+1] - ii[i];
3607     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3608     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3609     jj += nz; aa += nz;
3610   }
3611   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3612 
3613   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3614   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3615 
3616   ierr = PetscFree(colsub);CHKERRQ(ierr);
3617 
3618   /* save Msub, iscol_sub and iscmap used in processor for next request */
3619   if (call ==  MAT_INITIAL_MATRIX) {
3620     *newmat = M;
3621     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3622     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3623 
3624     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3625     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3626 
3627     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3628     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3629 
3630     if (iscol_local) {
3631       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3632       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3633     }
3634   }
3635   PetscFunctionReturn(0);
3636 }
3637 
3638 /*
3639     Not great since it makes two copies of the submatrix, first an SeqAIJ
3640   in local and then by concatenating the local matrices the end result.
3641   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3642 
3643   Note: This requires a sequential iscol with all indices.
3644 */
3645 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3646 {
3647   PetscErrorCode ierr;
3648   PetscMPIInt    rank,size;
3649   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3650   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3651   Mat            M,Mreuse;
3652   MatScalar      *aa,*vwork;
3653   MPI_Comm       comm;
3654   Mat_SeqAIJ     *aij;
3655   PetscBool      colflag,allcolumns=PETSC_FALSE;
3656 
3657   PetscFunctionBegin;
3658   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3659   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3660   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3661 
3662   /* Check for special case: each processor gets entire matrix columns */
3663   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3664   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3665   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3666 
3667   if (call ==  MAT_REUSE_MATRIX) {
3668     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3669     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3670     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3671   } else {
3672     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3673   }
3674 
3675   /*
3676       m - number of local rows
3677       n - number of columns (same on all processors)
3678       rstart - first row in new global matrix generated
3679   */
3680   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3681   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3682   if (call == MAT_INITIAL_MATRIX) {
3683     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3684     ii  = aij->i;
3685     jj  = aij->j;
3686 
3687     /*
3688         Determine the number of non-zeros in the diagonal and off-diagonal
3689         portions of the matrix in order to do correct preallocation
3690     */
3691 
3692     /* first get start and end of "diagonal" columns */
3693     if (csize == PETSC_DECIDE) {
3694       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3695       if (mglobal == n) { /* square matrix */
3696         nlocal = m;
3697       } else {
3698         nlocal = n/size + ((n % size) > rank);
3699       }
3700     } else {
3701       nlocal = csize;
3702     }
3703     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3704     rstart = rend - nlocal;
3705     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3706 
3707     /* next, compute all the lengths */
3708     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3709     olens = dlens + m;
3710     for (i=0; i<m; i++) {
3711       jend = ii[i+1] - ii[i];
3712       olen = 0;
3713       dlen = 0;
3714       for (j=0; j<jend; j++) {
3715         if (*jj < rstart || *jj >= rend) olen++;
3716         else dlen++;
3717         jj++;
3718       }
3719       olens[i] = olen;
3720       dlens[i] = dlen;
3721     }
3722     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3723     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3724     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3725     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3726     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3727     ierr = PetscFree(dlens);CHKERRQ(ierr);
3728   } else {
3729     PetscInt ml,nl;
3730 
3731     M    = *newmat;
3732     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3733     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3734     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3735     /*
3736          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3737        rather than the slower MatSetValues().
3738     */
3739     M->was_assembled = PETSC_TRUE;
3740     M->assembled     = PETSC_FALSE;
3741   }
3742   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3743   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3744   ii   = aij->i;
3745   jj   = aij->j;
3746   aa   = aij->a;
3747   for (i=0; i<m; i++) {
3748     row   = rstart + i;
3749     nz    = ii[i+1] - ii[i];
3750     cwork = jj;     jj += nz;
3751     vwork = aa;     aa += nz;
3752     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3753   }
3754 
3755   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3756   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3757   *newmat = M;
3758 
3759   /* save submatrix used in processor for next request */
3760   if (call ==  MAT_INITIAL_MATRIX) {
3761     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3762     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3763   }
3764   PetscFunctionReturn(0);
3765 }
3766 
3767 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3768 {
3769   PetscInt       m,cstart, cend,j,nnz,i,d;
3770   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3771   const PetscInt *JJ;
3772   PetscScalar    *values;
3773   PetscErrorCode ierr;
3774   PetscBool      nooffprocentries;
3775 
3776   PetscFunctionBegin;
3777   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3778 
3779   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3780   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3781   m      = B->rmap->n;
3782   cstart = B->cmap->rstart;
3783   cend   = B->cmap->rend;
3784   rstart = B->rmap->rstart;
3785 
3786   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3787 
3788 #if defined(PETSC_USE_DEBUGGING)
3789   for (i=0; i<m; i++) {
3790     nnz = Ii[i+1]- Ii[i];
3791     JJ  = J + Ii[i];
3792     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3793     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3794     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3795   }
3796 #endif
3797 
3798   for (i=0; i<m; i++) {
3799     nnz     = Ii[i+1]- Ii[i];
3800     JJ      = J + Ii[i];
3801     nnz_max = PetscMax(nnz_max,nnz);
3802     d       = 0;
3803     for (j=0; j<nnz; j++) {
3804       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3805     }
3806     d_nnz[i] = d;
3807     o_nnz[i] = nnz - d;
3808   }
3809   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3810   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3811 
3812   if (v) values = (PetscScalar*)v;
3813   else {
3814     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3815   }
3816 
3817   for (i=0; i<m; i++) {
3818     ii   = i + rstart;
3819     nnz  = Ii[i+1]- Ii[i];
3820     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3821   }
3822   nooffprocentries    = B->nooffprocentries;
3823   B->nooffprocentries = PETSC_TRUE;
3824   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3825   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3826   B->nooffprocentries = nooffprocentries;
3827 
3828   if (!v) {
3829     ierr = PetscFree(values);CHKERRQ(ierr);
3830   }
3831   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3832   PetscFunctionReturn(0);
3833 }
3834 
3835 /*@
3836    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3837    (the default parallel PETSc format).
3838 
3839    Collective on MPI_Comm
3840 
3841    Input Parameters:
3842 +  B - the matrix
3843 .  i - the indices into j for the start of each local row (starts with zero)
3844 .  j - the column indices for each local row (starts with zero)
3845 -  v - optional values in the matrix
3846 
3847    Level: developer
3848 
3849    Notes:
3850        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3851      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3852      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3853 
3854        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3855 
3856        The format which is used for the sparse matrix input, is equivalent to a
3857     row-major ordering.. i.e for the following matrix, the input data expected is
3858     as shown
3859 
3860 $        1 0 0
3861 $        2 0 3     P0
3862 $       -------
3863 $        4 5 6     P1
3864 $
3865 $     Process0 [P0]: rows_owned=[0,1]
3866 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3867 $        j =  {0,0,2}  [size = 3]
3868 $        v =  {1,2,3}  [size = 3]
3869 $
3870 $     Process1 [P1]: rows_owned=[2]
3871 $        i =  {0,3}    [size = nrow+1  = 1+1]
3872 $        j =  {0,1,2}  [size = 3]
3873 $        v =  {4,5,6}  [size = 3]
3874 
3875 .keywords: matrix, aij, compressed row, sparse, parallel
3876 
3877 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3878           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3879 @*/
3880 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3881 {
3882   PetscErrorCode ierr;
3883 
3884   PetscFunctionBegin;
3885   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3886   PetscFunctionReturn(0);
3887 }
3888 
3889 /*@C
3890    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3891    (the default parallel PETSc format).  For good matrix assembly performance
3892    the user should preallocate the matrix storage by setting the parameters
3893    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3894    performance can be increased by more than a factor of 50.
3895 
3896    Collective on MPI_Comm
3897 
3898    Input Parameters:
3899 +  B - the matrix
3900 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3901            (same value is used for all local rows)
3902 .  d_nnz - array containing the number of nonzeros in the various rows of the
3903            DIAGONAL portion of the local submatrix (possibly different for each row)
3904            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3905            The size of this array is equal to the number of local rows, i.e 'm'.
3906            For matrices that will be factored, you must leave room for (and set)
3907            the diagonal entry even if it is zero.
3908 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3909            submatrix (same value is used for all local rows).
3910 -  o_nnz - array containing the number of nonzeros in the various rows of the
3911            OFF-DIAGONAL portion of the local submatrix (possibly different for
3912            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3913            structure. The size of this array is equal to the number
3914            of local rows, i.e 'm'.
3915 
3916    If the *_nnz parameter is given then the *_nz parameter is ignored
3917 
3918    The AIJ format (also called the Yale sparse matrix format or
3919    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3920    storage.  The stored row and column indices begin with zero.
3921    See Users-Manual: ch_mat for details.
3922 
3923    The parallel matrix is partitioned such that the first m0 rows belong to
3924    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3925    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3926 
3927    The DIAGONAL portion of the local submatrix of a processor can be defined
3928    as the submatrix which is obtained by extraction the part corresponding to
3929    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3930    first row that belongs to the processor, r2 is the last row belonging to
3931    the this processor, and c1-c2 is range of indices of the local part of a
3932    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3933    common case of a square matrix, the row and column ranges are the same and
3934    the DIAGONAL part is also square. The remaining portion of the local
3935    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3936 
3937    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3938 
3939    You can call MatGetInfo() to get information on how effective the preallocation was;
3940    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3941    You can also run with the option -info and look for messages with the string
3942    malloc in them to see if additional memory allocation was needed.
3943 
3944    Example usage:
3945 
3946    Consider the following 8x8 matrix with 34 non-zero values, that is
3947    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3948    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3949    as follows:
3950 
3951 .vb
3952             1  2  0  |  0  3  0  |  0  4
3953     Proc0   0  5  6  |  7  0  0  |  8  0
3954             9  0 10  | 11  0  0  | 12  0
3955     -------------------------------------
3956            13  0 14  | 15 16 17  |  0  0
3957     Proc1   0 18  0  | 19 20 21  |  0  0
3958             0  0  0  | 22 23  0  | 24  0
3959     -------------------------------------
3960     Proc2  25 26 27  |  0  0 28  | 29  0
3961            30  0  0  | 31 32 33  |  0 34
3962 .ve
3963 
3964    This can be represented as a collection of submatrices as:
3965 
3966 .vb
3967       A B C
3968       D E F
3969       G H I
3970 .ve
3971 
3972    Where the submatrices A,B,C are owned by proc0, D,E,F are
3973    owned by proc1, G,H,I are owned by proc2.
3974 
3975    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3976    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3977    The 'M','N' parameters are 8,8, and have the same values on all procs.
3978 
3979    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3980    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3981    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3982    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3983    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3984    matrix, ans [DF] as another SeqAIJ matrix.
3985 
3986    When d_nz, o_nz parameters are specified, d_nz storage elements are
3987    allocated for every row of the local diagonal submatrix, and o_nz
3988    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3989    One way to choose d_nz and o_nz is to use the max nonzerors per local
3990    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3991    In this case, the values of d_nz,o_nz are:
3992 .vb
3993      proc0 : dnz = 2, o_nz = 2
3994      proc1 : dnz = 3, o_nz = 2
3995      proc2 : dnz = 1, o_nz = 4
3996 .ve
3997    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3998    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3999    for proc3. i.e we are using 12+15+10=37 storage locations to store
4000    34 values.
4001 
4002    When d_nnz, o_nnz parameters are specified, the storage is specified
4003    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4004    In the above case the values for d_nnz,o_nnz are:
4005 .vb
4006      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4007      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4008      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4009 .ve
4010    Here the space allocated is sum of all the above values i.e 34, and
4011    hence pre-allocation is perfect.
4012 
4013    Level: intermediate
4014 
4015 .keywords: matrix, aij, compressed row, sparse, parallel
4016 
4017 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4018           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4019 @*/
4020 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4021 {
4022   PetscErrorCode ierr;
4023 
4024   PetscFunctionBegin;
4025   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4026   PetscValidType(B,1);
4027   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4028   PetscFunctionReturn(0);
4029 }
4030 
4031 /*@
4032      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4033          CSR format the local rows.
4034 
4035    Collective on MPI_Comm
4036 
4037    Input Parameters:
4038 +  comm - MPI communicator
4039 .  m - number of local rows (Cannot be PETSC_DECIDE)
4040 .  n - This value should be the same as the local size used in creating the
4041        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4042        calculated if N is given) For square matrices n is almost always m.
4043 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4044 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4045 .   i - row indices
4046 .   j - column indices
4047 -   a - matrix values
4048 
4049    Output Parameter:
4050 .   mat - the matrix
4051 
4052    Level: intermediate
4053 
4054    Notes:
4055        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4056      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4057      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4058 
4059        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4060 
4061        The format which is used for the sparse matrix input, is equivalent to a
4062     row-major ordering.. i.e for the following matrix, the input data expected is
4063     as shown
4064 
4065 $        1 0 0
4066 $        2 0 3     P0
4067 $       -------
4068 $        4 5 6     P1
4069 $
4070 $     Process0 [P0]: rows_owned=[0,1]
4071 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4072 $        j =  {0,0,2}  [size = 3]
4073 $        v =  {1,2,3}  [size = 3]
4074 $
4075 $     Process1 [P1]: rows_owned=[2]
4076 $        i =  {0,3}    [size = nrow+1  = 1+1]
4077 $        j =  {0,1,2}  [size = 3]
4078 $        v =  {4,5,6}  [size = 3]
4079 
4080 .keywords: matrix, aij, compressed row, sparse, parallel
4081 
4082 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4083           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4084 @*/
4085 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4086 {
4087   PetscErrorCode ierr;
4088 
4089   PetscFunctionBegin;
4090   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4091   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4092   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4093   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4094   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4095   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4096   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4097   PetscFunctionReturn(0);
4098 }
4099 
4100 /*@C
4101    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4102    (the default parallel PETSc format).  For good matrix assembly performance
4103    the user should preallocate the matrix storage by setting the parameters
4104    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4105    performance can be increased by more than a factor of 50.
4106 
4107    Collective on MPI_Comm
4108 
4109    Input Parameters:
4110 +  comm - MPI communicator
4111 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4112            This value should be the same as the local size used in creating the
4113            y vector for the matrix-vector product y = Ax.
4114 .  n - This value should be the same as the local size used in creating the
4115        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4116        calculated if N is given) For square matrices n is almost always m.
4117 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4118 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4119 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4120            (same value is used for all local rows)
4121 .  d_nnz - array containing the number of nonzeros in the various rows of the
4122            DIAGONAL portion of the local submatrix (possibly different for each row)
4123            or NULL, if d_nz is used to specify the nonzero structure.
4124            The size of this array is equal to the number of local rows, i.e 'm'.
4125 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4126            submatrix (same value is used for all local rows).
4127 -  o_nnz - array containing the number of nonzeros in the various rows of the
4128            OFF-DIAGONAL portion of the local submatrix (possibly different for
4129            each row) or NULL, if o_nz is used to specify the nonzero
4130            structure. The size of this array is equal to the number
4131            of local rows, i.e 'm'.
4132 
4133    Output Parameter:
4134 .  A - the matrix
4135 
4136    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4137    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4138    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4139 
4140    Notes:
4141    If the *_nnz parameter is given then the *_nz parameter is ignored
4142 
4143    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4144    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4145    storage requirements for this matrix.
4146 
4147    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4148    processor than it must be used on all processors that share the object for
4149    that argument.
4150 
4151    The user MUST specify either the local or global matrix dimensions
4152    (possibly both).
4153 
4154    The parallel matrix is partitioned across processors such that the
4155    first m0 rows belong to process 0, the next m1 rows belong to
4156    process 1, the next m2 rows belong to process 2 etc.. where
4157    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4158    values corresponding to [m x N] submatrix.
4159 
4160    The columns are logically partitioned with the n0 columns belonging
4161    to 0th partition, the next n1 columns belonging to the next
4162    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4163 
4164    The DIAGONAL portion of the local submatrix on any given processor
4165    is the submatrix corresponding to the rows and columns m,n
4166    corresponding to the given processor. i.e diagonal matrix on
4167    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4168    etc. The remaining portion of the local submatrix [m x (N-n)]
4169    constitute the OFF-DIAGONAL portion. The example below better
4170    illustrates this concept.
4171 
4172    For a square global matrix we define each processor's diagonal portion
4173    to be its local rows and the corresponding columns (a square submatrix);
4174    each processor's off-diagonal portion encompasses the remainder of the
4175    local matrix (a rectangular submatrix).
4176 
4177    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4178 
4179    When calling this routine with a single process communicator, a matrix of
4180    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4181    type of communicator, use the construction mechanism
4182 .vb
4183      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4184 .ve
4185 
4186 $     MatCreate(...,&A);
4187 $     MatSetType(A,MATMPIAIJ);
4188 $     MatSetSizes(A, m,n,M,N);
4189 $     MatMPIAIJSetPreallocation(A,...);
4190 
4191    By default, this format uses inodes (identical nodes) when possible.
4192    We search for consecutive rows with the same nonzero structure, thereby
4193    reusing matrix information to achieve increased efficiency.
4194 
4195    Options Database Keys:
4196 +  -mat_no_inode  - Do not use inodes
4197 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4198 -  -mat_aij_oneindex - Internally use indexing starting at 1
4199         rather than 0.  Note that when calling MatSetValues(),
4200         the user still MUST index entries starting at 0!
4201 
4202 
4203    Example usage:
4204 
4205    Consider the following 8x8 matrix with 34 non-zero values, that is
4206    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4207    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4208    as follows
4209 
4210 .vb
4211             1  2  0  |  0  3  0  |  0  4
4212     Proc0   0  5  6  |  7  0  0  |  8  0
4213             9  0 10  | 11  0  0  | 12  0
4214     -------------------------------------
4215            13  0 14  | 15 16 17  |  0  0
4216     Proc1   0 18  0  | 19 20 21  |  0  0
4217             0  0  0  | 22 23  0  | 24  0
4218     -------------------------------------
4219     Proc2  25 26 27  |  0  0 28  | 29  0
4220            30  0  0  | 31 32 33  |  0 34
4221 .ve
4222 
4223    This can be represented as a collection of submatrices as
4224 
4225 .vb
4226       A B C
4227       D E F
4228       G H I
4229 .ve
4230 
4231    Where the submatrices A,B,C are owned by proc0, D,E,F are
4232    owned by proc1, G,H,I are owned by proc2.
4233 
4234    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4235    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4236    The 'M','N' parameters are 8,8, and have the same values on all procs.
4237 
4238    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4239    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4240    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4241    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4242    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4243    matrix, ans [DF] as another SeqAIJ matrix.
4244 
4245    When d_nz, o_nz parameters are specified, d_nz storage elements are
4246    allocated for every row of the local diagonal submatrix, and o_nz
4247    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4248    One way to choose d_nz and o_nz is to use the max nonzerors per local
4249    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4250    In this case, the values of d_nz,o_nz are
4251 .vb
4252      proc0 : dnz = 2, o_nz = 2
4253      proc1 : dnz = 3, o_nz = 2
4254      proc2 : dnz = 1, o_nz = 4
4255 .ve
4256    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4257    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4258    for proc3. i.e we are using 12+15+10=37 storage locations to store
4259    34 values.
4260 
4261    When d_nnz, o_nnz parameters are specified, the storage is specified
4262    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4263    In the above case the values for d_nnz,o_nnz are
4264 .vb
4265      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4266      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4267      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4268 .ve
4269    Here the space allocated is sum of all the above values i.e 34, and
4270    hence pre-allocation is perfect.
4271 
4272    Level: intermediate
4273 
4274 .keywords: matrix, aij, compressed row, sparse, parallel
4275 
4276 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4277           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4278 @*/
4279 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4280 {
4281   PetscErrorCode ierr;
4282   PetscMPIInt    size;
4283 
4284   PetscFunctionBegin;
4285   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4286   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4287   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4288   if (size > 1) {
4289     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4290     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4291   } else {
4292     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4293     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4294   }
4295   PetscFunctionReturn(0);
4296 }
4297 
4298 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4299 {
4300   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4301   PetscBool      flg;
4302   PetscErrorCode ierr;
4303 
4304   PetscFunctionBegin;
4305   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4306   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4307   if (Ad)     *Ad     = a->A;
4308   if (Ao)     *Ao     = a->B;
4309   if (colmap) *colmap = a->garray;
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4314 {
4315   PetscErrorCode ierr;
4316   PetscInt       m,N,i,rstart,nnz,Ii;
4317   PetscInt       *indx;
4318   PetscScalar    *values;
4319 
4320   PetscFunctionBegin;
4321   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4322   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4323     PetscInt       *dnz,*onz,sum,bs,cbs;
4324 
4325     if (n == PETSC_DECIDE) {
4326       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4327     }
4328     /* Check sum(n) = N */
4329     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4330     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4331 
4332     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4333     rstart -= m;
4334 
4335     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4336     for (i=0; i<m; i++) {
4337       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4338       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4339       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4340     }
4341 
4342     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4343     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4344     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4345     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4346     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4347     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4348     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4349     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4350   }
4351 
4352   /* numeric phase */
4353   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4354   for (i=0; i<m; i++) {
4355     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4356     Ii   = i + rstart;
4357     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4358     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4359   }
4360   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4361   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4362   PetscFunctionReturn(0);
4363 }
4364 
4365 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4366 {
4367   PetscErrorCode    ierr;
4368   PetscMPIInt       rank;
4369   PetscInt          m,N,i,rstart,nnz;
4370   size_t            len;
4371   const PetscInt    *indx;
4372   PetscViewer       out;
4373   char              *name;
4374   Mat               B;
4375   const PetscScalar *values;
4376 
4377   PetscFunctionBegin;
4378   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4379   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4380   /* Should this be the type of the diagonal block of A? */
4381   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4382   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4383   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4384   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4385   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4386   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4387   for (i=0; i<m; i++) {
4388     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4389     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4390     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4391   }
4392   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4393   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4394 
4395   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4396   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4397   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4398   sprintf(name,"%s.%d",outfile,rank);
4399   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4400   ierr = PetscFree(name);CHKERRQ(ierr);
4401   ierr = MatView(B,out);CHKERRQ(ierr);
4402   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4403   ierr = MatDestroy(&B);CHKERRQ(ierr);
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4408 {
4409   PetscErrorCode      ierr;
4410   Mat_Merge_SeqsToMPI *merge;
4411   PetscContainer      container;
4412 
4413   PetscFunctionBegin;
4414   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4415   if (container) {
4416     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4429     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4430     ierr = PetscFree(merge);CHKERRQ(ierr);
4431     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4432   }
4433   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 #include <../src/mat/utils/freespace.h>
4438 #include <petscbt.h>
4439 
4440 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4441 {
4442   PetscErrorCode      ierr;
4443   MPI_Comm            comm;
4444   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4445   PetscMPIInt         size,rank,taga,*len_s;
4446   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4447   PetscInt            proc,m;
4448   PetscInt            **buf_ri,**buf_rj;
4449   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4450   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4451   MPI_Request         *s_waits,*r_waits;
4452   MPI_Status          *status;
4453   MatScalar           *aa=a->a;
4454   MatScalar           **abuf_r,*ba_i;
4455   Mat_Merge_SeqsToMPI *merge;
4456   PetscContainer      container;
4457 
4458   PetscFunctionBegin;
4459   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4460   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4461 
4462   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4463   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4464 
4465   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4466   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4467 
4468   bi     = merge->bi;
4469   bj     = merge->bj;
4470   buf_ri = merge->buf_ri;
4471   buf_rj = merge->buf_rj;
4472 
4473   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4474   owners = merge->rowmap->range;
4475   len_s  = merge->len_s;
4476 
4477   /* send and recv matrix values */
4478   /*-----------------------------*/
4479   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4480   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4481 
4482   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4483   for (proc=0,k=0; proc<size; proc++) {
4484     if (!len_s[proc]) continue;
4485     i    = owners[proc];
4486     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4487     k++;
4488   }
4489 
4490   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4491   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4492   ierr = PetscFree(status);CHKERRQ(ierr);
4493 
4494   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4495   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4496 
4497   /* insert mat values of mpimat */
4498   /*----------------------------*/
4499   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4500   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4501 
4502   for (k=0; k<merge->nrecv; k++) {
4503     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4504     nrows       = *(buf_ri_k[k]);
4505     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4506     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4507   }
4508 
4509   /* set values of ba */
4510   m = merge->rowmap->n;
4511   for (i=0; i<m; i++) {
4512     arow = owners[rank] + i;
4513     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4514     bnzi = bi[i+1] - bi[i];
4515     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4516 
4517     /* add local non-zero vals of this proc's seqmat into ba */
4518     anzi   = ai[arow+1] - ai[arow];
4519     aj     = a->j + ai[arow];
4520     aa     = a->a + ai[arow];
4521     nextaj = 0;
4522     for (j=0; nextaj<anzi; j++) {
4523       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4524         ba_i[j] += aa[nextaj++];
4525       }
4526     }
4527 
4528     /* add received vals into ba */
4529     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4530       /* i-th row */
4531       if (i == *nextrow[k]) {
4532         anzi   = *(nextai[k]+1) - *nextai[k];
4533         aj     = buf_rj[k] + *(nextai[k]);
4534         aa     = abuf_r[k] + *(nextai[k]);
4535         nextaj = 0;
4536         for (j=0; nextaj<anzi; j++) {
4537           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4538             ba_i[j] += aa[nextaj++];
4539           }
4540         }
4541         nextrow[k]++; nextai[k]++;
4542       }
4543     }
4544     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4545   }
4546   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4547   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4548 
4549   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4550   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4551   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4552   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4553   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4554   PetscFunctionReturn(0);
4555 }
4556 
4557 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4558 {
4559   PetscErrorCode      ierr;
4560   Mat                 B_mpi;
4561   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4562   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4563   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4564   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4565   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4566   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4567   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4568   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4569   MPI_Status          *status;
4570   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4571   PetscBT             lnkbt;
4572   Mat_Merge_SeqsToMPI *merge;
4573   PetscContainer      container;
4574 
4575   PetscFunctionBegin;
4576   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4577 
4578   /* make sure it is a PETSc comm */
4579   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4580   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4581   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4582 
4583   ierr = PetscNew(&merge);CHKERRQ(ierr);
4584   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4585 
4586   /* determine row ownership */
4587   /*---------------------------------------------------------*/
4588   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4589   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4590   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4591   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4592   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4593   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4594   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4595 
4596   m      = merge->rowmap->n;
4597   owners = merge->rowmap->range;
4598 
4599   /* determine the number of messages to send, their lengths */
4600   /*---------------------------------------------------------*/
4601   len_s = merge->len_s;
4602 
4603   len          = 0; /* length of buf_si[] */
4604   merge->nsend = 0;
4605   for (proc=0; proc<size; proc++) {
4606     len_si[proc] = 0;
4607     if (proc == rank) {
4608       len_s[proc] = 0;
4609     } else {
4610       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4611       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4612     }
4613     if (len_s[proc]) {
4614       merge->nsend++;
4615       nrows = 0;
4616       for (i=owners[proc]; i<owners[proc+1]; i++) {
4617         if (ai[i+1] > ai[i]) nrows++;
4618       }
4619       len_si[proc] = 2*(nrows+1);
4620       len         += len_si[proc];
4621     }
4622   }
4623 
4624   /* determine the number and length of messages to receive for ij-structure */
4625   /*-------------------------------------------------------------------------*/
4626   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4627   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4628 
4629   /* post the Irecv of j-structure */
4630   /*-------------------------------*/
4631   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4632   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4633 
4634   /* post the Isend of j-structure */
4635   /*--------------------------------*/
4636   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4637 
4638   for (proc=0, k=0; proc<size; proc++) {
4639     if (!len_s[proc]) continue;
4640     i    = owners[proc];
4641     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4642     k++;
4643   }
4644 
4645   /* receives and sends of j-structure are complete */
4646   /*------------------------------------------------*/
4647   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4648   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4649 
4650   /* send and recv i-structure */
4651   /*---------------------------*/
4652   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4653   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4654 
4655   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4656   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4657   for (proc=0,k=0; proc<size; proc++) {
4658     if (!len_s[proc]) continue;
4659     /* form outgoing message for i-structure:
4660          buf_si[0]:                 nrows to be sent
4661                [1:nrows]:           row index (global)
4662                [nrows+1:2*nrows+1]: i-structure index
4663     */
4664     /*-------------------------------------------*/
4665     nrows       = len_si[proc]/2 - 1;
4666     buf_si_i    = buf_si + nrows+1;
4667     buf_si[0]   = nrows;
4668     buf_si_i[0] = 0;
4669     nrows       = 0;
4670     for (i=owners[proc]; i<owners[proc+1]; i++) {
4671       anzi = ai[i+1] - ai[i];
4672       if (anzi) {
4673         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4674         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4675         nrows++;
4676       }
4677     }
4678     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4679     k++;
4680     buf_si += len_si[proc];
4681   }
4682 
4683   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4684   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4685 
4686   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4687   for (i=0; i<merge->nrecv; i++) {
4688     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4689   }
4690 
4691   ierr = PetscFree(len_si);CHKERRQ(ierr);
4692   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4693   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4694   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4695   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4696   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4697   ierr = PetscFree(status);CHKERRQ(ierr);
4698 
4699   /* compute a local seq matrix in each processor */
4700   /*----------------------------------------------*/
4701   /* allocate bi array and free space for accumulating nonzero column info */
4702   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4703   bi[0] = 0;
4704 
4705   /* create and initialize a linked list */
4706   nlnk = N+1;
4707   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4708 
4709   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4710   len  = ai[owners[rank+1]] - ai[owners[rank]];
4711   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4712 
4713   current_space = free_space;
4714 
4715   /* determine symbolic info for each local row */
4716   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4717 
4718   for (k=0; k<merge->nrecv; k++) {
4719     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4720     nrows       = *buf_ri_k[k];
4721     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4722     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4723   }
4724 
4725   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4726   len  = 0;
4727   for (i=0; i<m; i++) {
4728     bnzi = 0;
4729     /* add local non-zero cols of this proc's seqmat into lnk */
4730     arow  = owners[rank] + i;
4731     anzi  = ai[arow+1] - ai[arow];
4732     aj    = a->j + ai[arow];
4733     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4734     bnzi += nlnk;
4735     /* add received col data into lnk */
4736     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4737       if (i == *nextrow[k]) { /* i-th row */
4738         anzi  = *(nextai[k]+1) - *nextai[k];
4739         aj    = buf_rj[k] + *nextai[k];
4740         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4741         bnzi += nlnk;
4742         nextrow[k]++; nextai[k]++;
4743       }
4744     }
4745     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4746 
4747     /* if free space is not available, make more free space */
4748     if (current_space->local_remaining<bnzi) {
4749       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4750       nspacedouble++;
4751     }
4752     /* copy data into free space, then initialize lnk */
4753     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4754     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4755 
4756     current_space->array           += bnzi;
4757     current_space->local_used      += bnzi;
4758     current_space->local_remaining -= bnzi;
4759 
4760     bi[i+1] = bi[i] + bnzi;
4761   }
4762 
4763   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4764 
4765   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4766   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4767   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4768 
4769   /* create symbolic parallel matrix B_mpi */
4770   /*---------------------------------------*/
4771   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4772   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4773   if (n==PETSC_DECIDE) {
4774     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4775   } else {
4776     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4777   }
4778   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4779   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4780   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4781   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4782   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4783 
4784   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4785   B_mpi->assembled    = PETSC_FALSE;
4786   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4787   merge->bi           = bi;
4788   merge->bj           = bj;
4789   merge->buf_ri       = buf_ri;
4790   merge->buf_rj       = buf_rj;
4791   merge->coi          = NULL;
4792   merge->coj          = NULL;
4793   merge->owners_co    = NULL;
4794 
4795   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4796 
4797   /* attach the supporting struct to B_mpi for reuse */
4798   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4799   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4800   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4801   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4802   *mpimat = B_mpi;
4803 
4804   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4805   PetscFunctionReturn(0);
4806 }
4807 
4808 /*@C
4809       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4810                  matrices from each processor
4811 
4812     Collective on MPI_Comm
4813 
4814    Input Parameters:
4815 +    comm - the communicators the parallel matrix will live on
4816 .    seqmat - the input sequential matrices
4817 .    m - number of local rows (or PETSC_DECIDE)
4818 .    n - number of local columns (or PETSC_DECIDE)
4819 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4820 
4821    Output Parameter:
4822 .    mpimat - the parallel matrix generated
4823 
4824     Level: advanced
4825 
4826    Notes:
4827      The dimensions of the sequential matrix in each processor MUST be the same.
4828      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4829      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4830 @*/
4831 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4832 {
4833   PetscErrorCode ierr;
4834   PetscMPIInt    size;
4835 
4836   PetscFunctionBegin;
4837   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4838   if (size == 1) {
4839     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4840     if (scall == MAT_INITIAL_MATRIX) {
4841       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4842     } else {
4843       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4844     }
4845     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4846     PetscFunctionReturn(0);
4847   }
4848   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4849   if (scall == MAT_INITIAL_MATRIX) {
4850     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4851   }
4852   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4853   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4854   PetscFunctionReturn(0);
4855 }
4856 
4857 /*@
4858      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4859           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4860           with MatGetSize()
4861 
4862     Not Collective
4863 
4864    Input Parameters:
4865 +    A - the matrix
4866 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4867 
4868    Output Parameter:
4869 .    A_loc - the local sequential matrix generated
4870 
4871     Level: developer
4872 
4873 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4874 
4875 @*/
4876 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4877 {
4878   PetscErrorCode ierr;
4879   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4880   Mat_SeqAIJ     *mat,*a,*b;
4881   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4882   MatScalar      *aa,*ba,*cam;
4883   PetscScalar    *ca;
4884   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4885   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4886   PetscBool      match;
4887   MPI_Comm       comm;
4888   PetscMPIInt    size;
4889 
4890   PetscFunctionBegin;
4891   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4892   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4893   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4894   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4895   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4896 
4897   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4898   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4899   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4900   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4901   aa = a->a; ba = b->a;
4902   if (scall == MAT_INITIAL_MATRIX) {
4903     if (size == 1) {
4904       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4905       PetscFunctionReturn(0);
4906     }
4907 
4908     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4909     ci[0] = 0;
4910     for (i=0; i<am; i++) {
4911       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4912     }
4913     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4914     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4915     k    = 0;
4916     for (i=0; i<am; i++) {
4917       ncols_o = bi[i+1] - bi[i];
4918       ncols_d = ai[i+1] - ai[i];
4919       /* off-diagonal portion of A */
4920       for (jo=0; jo<ncols_o; jo++) {
4921         col = cmap[*bj];
4922         if (col >= cstart) break;
4923         cj[k]   = col; bj++;
4924         ca[k++] = *ba++;
4925       }
4926       /* diagonal portion of A */
4927       for (j=0; j<ncols_d; j++) {
4928         cj[k]   = cstart + *aj++;
4929         ca[k++] = *aa++;
4930       }
4931       /* off-diagonal portion of A */
4932       for (j=jo; j<ncols_o; j++) {
4933         cj[k]   = cmap[*bj++];
4934         ca[k++] = *ba++;
4935       }
4936     }
4937     /* put together the new matrix */
4938     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4939     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4940     /* Since these are PETSc arrays, change flags to free them as necessary. */
4941     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4942     mat->free_a  = PETSC_TRUE;
4943     mat->free_ij = PETSC_TRUE;
4944     mat->nonew   = 0;
4945   } else if (scall == MAT_REUSE_MATRIX) {
4946     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4947     ci = mat->i; cj = mat->j; cam = mat->a;
4948     for (i=0; i<am; i++) {
4949       /* off-diagonal portion of A */
4950       ncols_o = bi[i+1] - bi[i];
4951       for (jo=0; jo<ncols_o; jo++) {
4952         col = cmap[*bj];
4953         if (col >= cstart) break;
4954         *cam++ = *ba++; bj++;
4955       }
4956       /* diagonal portion of A */
4957       ncols_d = ai[i+1] - ai[i];
4958       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4959       /* off-diagonal portion of A */
4960       for (j=jo; j<ncols_o; j++) {
4961         *cam++ = *ba++; bj++;
4962       }
4963     }
4964   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4965   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4966   PetscFunctionReturn(0);
4967 }
4968 
4969 /*@C
4970      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4971 
4972     Not Collective
4973 
4974    Input Parameters:
4975 +    A - the matrix
4976 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4977 -    row, col - index sets of rows and columns to extract (or NULL)
4978 
4979    Output Parameter:
4980 .    A_loc - the local sequential matrix generated
4981 
4982     Level: developer
4983 
4984 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4985 
4986 @*/
4987 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4988 {
4989   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4990   PetscErrorCode ierr;
4991   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4992   IS             isrowa,iscola;
4993   Mat            *aloc;
4994   PetscBool      match;
4995 
4996   PetscFunctionBegin;
4997   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4998   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4999   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5000   if (!row) {
5001     start = A->rmap->rstart; end = A->rmap->rend;
5002     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5003   } else {
5004     isrowa = *row;
5005   }
5006   if (!col) {
5007     start = A->cmap->rstart;
5008     cmap  = a->garray;
5009     nzA   = a->A->cmap->n;
5010     nzB   = a->B->cmap->n;
5011     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5012     ncols = 0;
5013     for (i=0; i<nzB; i++) {
5014       if (cmap[i] < start) idx[ncols++] = cmap[i];
5015       else break;
5016     }
5017     imark = i;
5018     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5019     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5020     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5021   } else {
5022     iscola = *col;
5023   }
5024   if (scall != MAT_INITIAL_MATRIX) {
5025     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5026     aloc[0] = *A_loc;
5027   }
5028   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5029   *A_loc = aloc[0];
5030   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5031   if (!row) {
5032     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5033   }
5034   if (!col) {
5035     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5036   }
5037   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5038   PetscFunctionReturn(0);
5039 }
5040 
5041 /*@C
5042     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5043 
5044     Collective on Mat
5045 
5046    Input Parameters:
5047 +    A,B - the matrices in mpiaij format
5048 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5049 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5050 
5051    Output Parameter:
5052 +    rowb, colb - index sets of rows and columns of B to extract
5053 -    B_seq - the sequential matrix generated
5054 
5055     Level: developer
5056 
5057 @*/
5058 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5059 {
5060   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5061   PetscErrorCode ierr;
5062   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5063   IS             isrowb,iscolb;
5064   Mat            *bseq=NULL;
5065 
5066   PetscFunctionBegin;
5067   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5068     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5069   }
5070   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5071 
5072   if (scall == MAT_INITIAL_MATRIX) {
5073     start = A->cmap->rstart;
5074     cmap  = a->garray;
5075     nzA   = a->A->cmap->n;
5076     nzB   = a->B->cmap->n;
5077     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5078     ncols = 0;
5079     for (i=0; i<nzB; i++) {  /* row < local row index */
5080       if (cmap[i] < start) idx[ncols++] = cmap[i];
5081       else break;
5082     }
5083     imark = i;
5084     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5085     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5086     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5087     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5088   } else {
5089     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5090     isrowb  = *rowb; iscolb = *colb;
5091     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5092     bseq[0] = *B_seq;
5093   }
5094   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5095   *B_seq = bseq[0];
5096   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5097   if (!rowb) {
5098     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5099   } else {
5100     *rowb = isrowb;
5101   }
5102   if (!colb) {
5103     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5104   } else {
5105     *colb = iscolb;
5106   }
5107   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5108   PetscFunctionReturn(0);
5109 }
5110 
5111 /*
5112     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5113     of the OFF-DIAGONAL portion of local A
5114 
5115     Collective on Mat
5116 
5117    Input Parameters:
5118 +    A,B - the matrices in mpiaij format
5119 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5120 
5121    Output Parameter:
5122 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5123 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5124 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5125 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5126 
5127     Level: developer
5128 
5129 */
5130 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5131 {
5132   VecScatter_MPI_General *gen_to,*gen_from;
5133   PetscErrorCode         ierr;
5134   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5135   Mat_SeqAIJ             *b_oth;
5136   VecScatter             ctx;
5137   MPI_Comm               comm;
5138   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5139   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5140   PetscInt               *rvalues,*svalues;
5141   MatScalar              *b_otha,*bufa,*bufA;
5142   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5143   MPI_Request            *rwaits = NULL,*swaits = NULL;
5144   MPI_Status             *sstatus,rstatus;
5145   PetscMPIInt            jj,size;
5146   PetscInt               *cols,sbs,rbs;
5147   PetscScalar            *vals;
5148 
5149   PetscFunctionBegin;
5150   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5151   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5152 
5153   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5154     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5155   }
5156   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5158 
5159   if (size == 1) {
5160     startsj_s = NULL;
5161     bufa_ptr  = NULL;
5162     *B_oth    = NULL;
5163     PetscFunctionReturn(0);
5164   }
5165 
5166   if (!a->Mvctx_mpi1) { /* create a->Mvctx_mpi1 to be used for Mat-Mat ops */
5167     a->Mvctx_mpi1_flg = PETSC_TRUE;
5168     ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5169   }
5170   ctx = a->Mvctx_mpi1;
5171   tag = ((PetscObject)ctx)->tag;
5172 
5173   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5174   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5175   nrecvs   = gen_from->n;
5176   nsends   = gen_to->n;
5177 
5178   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5179   srow    = gen_to->indices;    /* local row index to be sent */
5180   sstarts = gen_to->starts;
5181   sprocs  = gen_to->procs;
5182   sstatus = gen_to->sstatus;
5183   sbs     = gen_to->bs;
5184   rstarts = gen_from->starts;
5185   rprocs  = gen_from->procs;
5186   rbs     = gen_from->bs;
5187 
5188   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5189   if (scall == MAT_INITIAL_MATRIX) {
5190     /* i-array */
5191     /*---------*/
5192     /*  post receives */
5193     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5194     for (i=0; i<nrecvs; i++) {
5195       rowlen = rvalues + rstarts[i]*rbs;
5196       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5197       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5198     }
5199 
5200     /* pack the outgoing message */
5201     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5202 
5203     sstartsj[0] = 0;
5204     rstartsj[0] = 0;
5205     len         = 0; /* total length of j or a array to be sent */
5206     k           = 0;
5207     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5208     for (i=0; i<nsends; i++) {
5209       rowlen = svalues + sstarts[i]*sbs;
5210       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5211       for (j=0; j<nrows; j++) {
5212         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5213         for (l=0; l<sbs; l++) {
5214           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5215 
5216           rowlen[j*sbs+l] = ncols;
5217 
5218           len += ncols;
5219           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5220         }
5221         k++;
5222       }
5223       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5224 
5225       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5226     }
5227     /* recvs and sends of i-array are completed */
5228     i = nrecvs;
5229     while (i--) {
5230       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5231     }
5232     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5233     ierr = PetscFree(svalues);CHKERRQ(ierr);
5234 
5235     /* allocate buffers for sending j and a arrays */
5236     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5237     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5238 
5239     /* create i-array of B_oth */
5240     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5241 
5242     b_othi[0] = 0;
5243     len       = 0; /* total length of j or a array to be received */
5244     k         = 0;
5245     for (i=0; i<nrecvs; i++) {
5246       rowlen = rvalues + rstarts[i]*rbs;
5247       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5248       for (j=0; j<nrows; j++) {
5249         b_othi[k+1] = b_othi[k] + rowlen[j];
5250         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5251         k++;
5252       }
5253       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5254     }
5255     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5256 
5257     /* allocate space for j and a arrrays of B_oth */
5258     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5259     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5260 
5261     /* j-array */
5262     /*---------*/
5263     /*  post receives of j-array */
5264     for (i=0; i<nrecvs; i++) {
5265       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5266       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5267     }
5268 
5269     /* pack the outgoing message j-array */
5270     k = 0;
5271     for (i=0; i<nsends; i++) {
5272       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5273       bufJ  = bufj+sstartsj[i];
5274       for (j=0; j<nrows; j++) {
5275         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5276         for (ll=0; ll<sbs; ll++) {
5277           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5278           for (l=0; l<ncols; l++) {
5279             *bufJ++ = cols[l];
5280           }
5281           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5282         }
5283       }
5284       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5285     }
5286 
5287     /* recvs and sends of j-array are completed */
5288     i = nrecvs;
5289     while (i--) {
5290       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5291     }
5292     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5293   } else if (scall == MAT_REUSE_MATRIX) {
5294     sstartsj = *startsj_s;
5295     rstartsj = *startsj_r;
5296     bufa     = *bufa_ptr;
5297     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5298     b_otha   = b_oth->a;
5299   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5300 
5301   /* a-array */
5302   /*---------*/
5303   /*  post receives of a-array */
5304   for (i=0; i<nrecvs; i++) {
5305     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5306     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5307   }
5308 
5309   /* pack the outgoing message a-array */
5310   k = 0;
5311   for (i=0; i<nsends; i++) {
5312     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5313     bufA  = bufa+sstartsj[i];
5314     for (j=0; j<nrows; j++) {
5315       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5316       for (ll=0; ll<sbs; ll++) {
5317         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5318         for (l=0; l<ncols; l++) {
5319           *bufA++ = vals[l];
5320         }
5321         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5322       }
5323     }
5324     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5325   }
5326   /* recvs and sends of a-array are completed */
5327   i = nrecvs;
5328   while (i--) {
5329     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5330   }
5331   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5332   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5333 
5334   if (scall == MAT_INITIAL_MATRIX) {
5335     /* put together the new matrix */
5336     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5337 
5338     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5339     /* Since these are PETSc arrays, change flags to free them as necessary. */
5340     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5341     b_oth->free_a  = PETSC_TRUE;
5342     b_oth->free_ij = PETSC_TRUE;
5343     b_oth->nonew   = 0;
5344 
5345     ierr = PetscFree(bufj);CHKERRQ(ierr);
5346     if (!startsj_s || !bufa_ptr) {
5347       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5348       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5349     } else {
5350       *startsj_s = sstartsj;
5351       *startsj_r = rstartsj;
5352       *bufa_ptr  = bufa;
5353     }
5354   }
5355   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5356   PetscFunctionReturn(0);
5357 }
5358 
5359 /*@C
5360   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5361 
5362   Not Collective
5363 
5364   Input Parameters:
5365 . A - The matrix in mpiaij format
5366 
5367   Output Parameter:
5368 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5369 . colmap - A map from global column index to local index into lvec
5370 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5371 
5372   Level: developer
5373 
5374 @*/
5375 #if defined(PETSC_USE_CTABLE)
5376 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5377 #else
5378 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5379 #endif
5380 {
5381   Mat_MPIAIJ *a;
5382 
5383   PetscFunctionBegin;
5384   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5385   PetscValidPointer(lvec, 2);
5386   PetscValidPointer(colmap, 3);
5387   PetscValidPointer(multScatter, 4);
5388   a = (Mat_MPIAIJ*) A->data;
5389   if (lvec) *lvec = a->lvec;
5390   if (colmap) *colmap = a->colmap;
5391   if (multScatter) *multScatter = a->Mvctx;
5392   PetscFunctionReturn(0);
5393 }
5394 
5395 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5396 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5397 #if defined(PETSC_HAVE_MKL_SPARSE)
5398 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5399 #endif
5400 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5401 #if defined(PETSC_HAVE_ELEMENTAL)
5402 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5403 #endif
5404 #if defined(PETSC_HAVE_HYPRE)
5405 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5406 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5407 #endif
5408 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5409 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5410 
5411 /*
5412     Computes (B'*A')' since computing B*A directly is untenable
5413 
5414                n                       p                          p
5415         (              )       (              )         (                  )
5416       m (      A       )  *  n (       B      )   =   m (         C        )
5417         (              )       (              )         (                  )
5418 
5419 */
5420 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5421 {
5422   PetscErrorCode ierr;
5423   Mat            At,Bt,Ct;
5424 
5425   PetscFunctionBegin;
5426   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5427   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5428   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5429   ierr = MatDestroy(&At);CHKERRQ(ierr);
5430   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5431   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5432   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5433   PetscFunctionReturn(0);
5434 }
5435 
5436 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5437 {
5438   PetscErrorCode ierr;
5439   PetscInt       m=A->rmap->n,n=B->cmap->n;
5440   Mat            Cmat;
5441 
5442   PetscFunctionBegin;
5443   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5444   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5445   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5446   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5447   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5448   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5449   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5450   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5451 
5452   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5453 
5454   *C = Cmat;
5455   PetscFunctionReturn(0);
5456 }
5457 
5458 /* ----------------------------------------------------------------*/
5459 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5460 {
5461   PetscErrorCode ierr;
5462 
5463   PetscFunctionBegin;
5464   if (scall == MAT_INITIAL_MATRIX) {
5465     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5466     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5467     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5468   }
5469   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5470   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5471   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5472   PetscFunctionReturn(0);
5473 }
5474 
5475 /*MC
5476    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5477 
5478    Options Database Keys:
5479 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5480 
5481   Level: beginner
5482 
5483 .seealso: MatCreateAIJ()
5484 M*/
5485 
5486 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5487 {
5488   Mat_MPIAIJ     *b;
5489   PetscErrorCode ierr;
5490   PetscMPIInt    size;
5491 
5492   PetscFunctionBegin;
5493   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5494 
5495   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5496   B->data       = (void*)b;
5497   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5498   B->assembled  = PETSC_FALSE;
5499   B->insertmode = NOT_SET_VALUES;
5500   b->size       = size;
5501 
5502   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5503 
5504   /* build cache for off array entries formed */
5505   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5506 
5507   b->donotstash  = PETSC_FALSE;
5508   b->colmap      = 0;
5509   b->garray      = 0;
5510   b->roworiented = PETSC_TRUE;
5511 
5512   /* stuff used for matrix vector multiply */
5513   b->lvec  = NULL;
5514   b->Mvctx = NULL;
5515 
5516   /* stuff for MatGetRow() */
5517   b->rowindices   = 0;
5518   b->rowvalues    = 0;
5519   b->getrowactive = PETSC_FALSE;
5520 
5521   /* flexible pointer used in CUSP/CUSPARSE classes */
5522   b->spptr = NULL;
5523 
5524   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5525   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5527   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5528   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5529   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5531   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5533 #if defined(PETSC_HAVE_MKL_SPARSE)
5534   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5535 #endif
5536   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5537   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5538 #if defined(PETSC_HAVE_ELEMENTAL)
5539   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5540 #endif
5541 #if defined(PETSC_HAVE_HYPRE)
5542   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5543 #endif
5544   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5545   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5546   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5547   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5549 #if defined(PETSC_HAVE_HYPRE)
5550   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5551 #endif
5552   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5553   PetscFunctionReturn(0);
5554 }
5555 
5556 /*@C
5557      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5558          and "off-diagonal" part of the matrix in CSR format.
5559 
5560    Collective on MPI_Comm
5561 
5562    Input Parameters:
5563 +  comm - MPI communicator
5564 .  m - number of local rows (Cannot be PETSC_DECIDE)
5565 .  n - This value should be the same as the local size used in creating the
5566        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5567        calculated if N is given) For square matrices n is almost always m.
5568 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5569 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5570 .   i - row indices for "diagonal" portion of matrix
5571 .   j - column indices
5572 .   a - matrix values
5573 .   oi - row indices for "off-diagonal" portion of matrix
5574 .   oj - column indices
5575 -   oa - matrix values
5576 
5577    Output Parameter:
5578 .   mat - the matrix
5579 
5580    Level: advanced
5581 
5582    Notes:
5583        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5584        must free the arrays once the matrix has been destroyed and not before.
5585 
5586        The i and j indices are 0 based
5587 
5588        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5589 
5590        This sets local rows and cannot be used to set off-processor values.
5591 
5592        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5593        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5594        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5595        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5596        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5597        communication if it is known that only local entries will be set.
5598 
5599 .keywords: matrix, aij, compressed row, sparse, parallel
5600 
5601 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5602           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5603 @*/
5604 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5605 {
5606   PetscErrorCode ierr;
5607   Mat_MPIAIJ     *maij;
5608 
5609   PetscFunctionBegin;
5610   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5611   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5612   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5613   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5614   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5615   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5616   maij = (Mat_MPIAIJ*) (*mat)->data;
5617 
5618   (*mat)->preallocated = PETSC_TRUE;
5619 
5620   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5621   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5622 
5623   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5624   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5625 
5626   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5627   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5629   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5630 
5631   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5632   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5633   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5634   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5635   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5636   PetscFunctionReturn(0);
5637 }
5638 
5639 /*
5640     Special version for direct calls from Fortran
5641 */
5642 #include <petsc/private/fortranimpl.h>
5643 
5644 /* Change these macros so can be used in void function */
5645 #undef CHKERRQ
5646 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5647 #undef SETERRQ2
5648 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5649 #undef SETERRQ3
5650 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5651 #undef SETERRQ
5652 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5653 
5654 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5655 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5656 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5657 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5658 #else
5659 #endif
5660 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5661 {
5662   Mat            mat  = *mmat;
5663   PetscInt       m    = *mm, n = *mn;
5664   InsertMode     addv = *maddv;
5665   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5666   PetscScalar    value;
5667   PetscErrorCode ierr;
5668 
5669   MatCheckPreallocated(mat,1);
5670   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5671 
5672 #if defined(PETSC_USE_DEBUG)
5673   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5674 #endif
5675   {
5676     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5677     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5678     PetscBool roworiented = aij->roworiented;
5679 
5680     /* Some Variables required in the macro */
5681     Mat        A                 = aij->A;
5682     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5683     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5684     MatScalar  *aa               = a->a;
5685     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5686     Mat        B                 = aij->B;
5687     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5688     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5689     MatScalar  *ba               = b->a;
5690 
5691     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5692     PetscInt  nonew = a->nonew;
5693     MatScalar *ap1,*ap2;
5694 
5695     PetscFunctionBegin;
5696     for (i=0; i<m; i++) {
5697       if (im[i] < 0) continue;
5698 #if defined(PETSC_USE_DEBUG)
5699       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5700 #endif
5701       if (im[i] >= rstart && im[i] < rend) {
5702         row      = im[i] - rstart;
5703         lastcol1 = -1;
5704         rp1      = aj + ai[row];
5705         ap1      = aa + ai[row];
5706         rmax1    = aimax[row];
5707         nrow1    = ailen[row];
5708         low1     = 0;
5709         high1    = nrow1;
5710         lastcol2 = -1;
5711         rp2      = bj + bi[row];
5712         ap2      = ba + bi[row];
5713         rmax2    = bimax[row];
5714         nrow2    = bilen[row];
5715         low2     = 0;
5716         high2    = nrow2;
5717 
5718         for (j=0; j<n; j++) {
5719           if (roworiented) value = v[i*n+j];
5720           else value = v[i+j*m];
5721           if (in[j] >= cstart && in[j] < cend) {
5722             col = in[j] - cstart;
5723             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5724             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5725           } else if (in[j] < 0) continue;
5726 #if defined(PETSC_USE_DEBUG)
5727           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5728 #endif
5729           else {
5730             if (mat->was_assembled) {
5731               if (!aij->colmap) {
5732                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5733               }
5734 #if defined(PETSC_USE_CTABLE)
5735               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5736               col--;
5737 #else
5738               col = aij->colmap[in[j]] - 1;
5739 #endif
5740               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5741               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5742                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5743                 col  =  in[j];
5744                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5745                 B     = aij->B;
5746                 b     = (Mat_SeqAIJ*)B->data;
5747                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5748                 rp2   = bj + bi[row];
5749                 ap2   = ba + bi[row];
5750                 rmax2 = bimax[row];
5751                 nrow2 = bilen[row];
5752                 low2  = 0;
5753                 high2 = nrow2;
5754                 bm    = aij->B->rmap->n;
5755                 ba    = b->a;
5756               }
5757             } else col = in[j];
5758             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5759           }
5760         }
5761       } else if (!aij->donotstash) {
5762         if (roworiented) {
5763           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5764         } else {
5765           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5766         }
5767       }
5768     }
5769   }
5770   PetscFunctionReturnVoid();
5771 }
5772 
5773