xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4b8d542ac4d29393e1fd5bc5f6c49e6fa97cfe65)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1042 {
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1051 {
1052   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1053   PetscErrorCode ierr;
1054 
1055   PetscFunctionBegin;
1056   /* do nondiagonal part */
1057   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1058   /* send it on its way */
1059   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1060   /* do local part */
1061   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   /* receive remote parts */
1063   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   PetscErrorCode ierr;
1074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1075 
1076   PetscFunctionBegin;
1077   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1078   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1079   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1090   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1095 {
1096   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100 #if defined(PETSC_USE_LOG)
1101   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1102 #endif
1103   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1105   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1106   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1107 #if defined(PETSC_USE_CTABLE)
1108   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1109 #else
1110   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1111 #endif
1112   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1113   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1114   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1115   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1116   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1117   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1118   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1119 
1120   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1129 #if defined(PETSC_HAVE_ELEMENTAL)
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1131 #endif
1132 #if defined(PETSC_HAVE_HYPRE)
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1135 #endif
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1143   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1144   PetscErrorCode ierr;
1145   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1146   int            fd;
1147   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1148   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1149   PetscScalar    *column_values;
1150   PetscInt       message_count,flowcontrolcount;
1151   FILE           *file;
1152 
1153   PetscFunctionBegin;
1154   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1155   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1156   nz   = A->nz + B->nz;
1157   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1158   if (!rank) {
1159     header[0] = MAT_FILE_CLASSID;
1160     header[1] = mat->rmap->N;
1161     header[2] = mat->cmap->N;
1162 
1163     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1164     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1165     /* get largest number of rows any processor has */
1166     rlen  = mat->rmap->n;
1167     range = mat->rmap->range;
1168     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1169   } else {
1170     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1171     rlen = mat->rmap->n;
1172   }
1173 
1174   /* load up the local row counts */
1175   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1176   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1177 
1178   /* store the row lengths to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1182     for (i=1; i<size; i++) {
1183       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1184       rlen = range[i+1] - range[i];
1185       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1187     }
1188     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1189   } else {
1190     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1191     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1192     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1193   }
1194   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1195 
1196   /* load up the local column indices */
1197   nzmax = nz; /* th processor needs space a largest processor needs */
1198   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1199   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1200   cnt   = 0;
1201   for (i=0; i<mat->rmap->n; i++) {
1202     for (j=B->i[i]; j<B->i[i+1]; j++) {
1203       if ((col = garray[B->j[j]]) > cstart) break;
1204       column_indices[cnt++] = col;
1205     }
1206     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1207     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1208   }
1209   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1210 
1211   /* store the column indices to the file */
1212   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1213   if (!rank) {
1214     MPI_Status status;
1215     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1216     for (i=1; i<size; i++) {
1217       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1218       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1219       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1220       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1221       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     }
1223     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1224   } else {
1225     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1226     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1229   }
1230   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1231 
1232   /* load up the local column values */
1233   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1234   cnt  = 0;
1235   for (i=0; i<mat->rmap->n; i++) {
1236     for (j=B->i[i]; j<B->i[i+1]; j++) {
1237       if (garray[B->j[j]] > cstart) break;
1238       column_values[cnt++] = B->a[j];
1239     }
1240     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1241     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1242   }
1243   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1244 
1245   /* store the column values to the file */
1246   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1247   if (!rank) {
1248     MPI_Status status;
1249     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1250     for (i=1; i<size; i++) {
1251       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1252       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1253       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1254       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1255       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1256     }
1257     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1258   } else {
1259     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1260     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1263   }
1264   ierr = PetscFree(column_values);CHKERRQ(ierr);
1265 
1266   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1267   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #include <petscdraw.h>
1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1273 {
1274   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1275   PetscErrorCode    ierr;
1276   PetscMPIInt       rank = aij->rank,size = aij->size;
1277   PetscBool         isdraw,iascii,isbinary;
1278   PetscViewer       sviewer;
1279   PetscViewerFormat format;
1280 
1281   PetscFunctionBegin;
1282   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1283   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1284   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1285   if (iascii) {
1286     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscBool inodes;
1290 
1291       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1292       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1293       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1295       if (!inodes) {
1296         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1297                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1298       } else {
1299         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1300                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1301       }
1302       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1304       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1306       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1309       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1314       if (inodes) {
1315         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1316       } else {
1317         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1326       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1327     } else {
1328       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1336     if (isnull) PetscFunctionReturn(0);
1337   }
1338 
1339   {
1340     /* assemble the entire matrix onto first processor. */
1341     Mat        A;
1342     Mat_SeqAIJ *Aloc;
1343     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1344     MatScalar  *a;
1345 
1346     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1347     if (!rank) {
1348       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1349     } else {
1350       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1351     }
1352     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1353     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1354     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1355     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1356     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1357 
1358     /* copy over the A part */
1359     Aloc = (Mat_SeqAIJ*)aij->A->data;
1360     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1361     row  = mat->rmap->rstart;
1362     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1363     for (i=0; i<m; i++) {
1364       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1365       row++;
1366       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1367     }
1368     aj = Aloc->j;
1369     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1370 
1371     /* copy over the B part */
1372     Aloc = (Mat_SeqAIJ*)aij->B->data;
1373     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1374     row  = mat->rmap->rstart;
1375     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1376     ct   = cols;
1377     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1378     for (i=0; i<m; i++) {
1379       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1380       row++;
1381       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1382     }
1383     ierr = PetscFree(ct);CHKERRQ(ierr);
1384     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1385     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1386     /*
1387        Everyone has to call to draw the matrix since the graphics waits are
1388        synchronized across all processors that share the PetscDraw object
1389     */
1390     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1391     if (!rank) {
1392       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1393       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1394     }
1395     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1396     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1397     ierr = MatDestroy(&A);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1403 {
1404   PetscErrorCode ierr;
1405   PetscBool      iascii,isdraw,issocket,isbinary;
1406 
1407   PetscFunctionBegin;
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1409   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1410   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1411   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1412   if (iascii || isdraw || isbinary || issocket) {
1413     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1414   }
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1419 {
1420   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1421   PetscErrorCode ierr;
1422   Vec            bb1 = 0;
1423   PetscBool      hasop;
1424 
1425   PetscFunctionBegin;
1426   if (flag == SOR_APPLY_UPPER) {
1427     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1428     PetscFunctionReturn(0);
1429   }
1430 
1431   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1432     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1433   }
1434 
1435   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1436     if (flag & SOR_ZERO_INITIAL_GUESS) {
1437       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1438       its--;
1439     }
1440 
1441     while (its--) {
1442       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1444 
1445       /* update rhs: bb1 = bb - B*x */
1446       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1447       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1448 
1449       /* local sweep */
1450       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1451     }
1452   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1453     if (flag & SOR_ZERO_INITIAL_GUESS) {
1454       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1455       its--;
1456     }
1457     while (its--) {
1458       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1459       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460 
1461       /* update rhs: bb1 = bb - B*x */
1462       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1463       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1464 
1465       /* local sweep */
1466       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1467     }
1468   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1469     if (flag & SOR_ZERO_INITIAL_GUESS) {
1470       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1471       its--;
1472     }
1473     while (its--) {
1474       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1475       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476 
1477       /* update rhs: bb1 = bb - B*x */
1478       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1479       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1480 
1481       /* local sweep */
1482       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1483     }
1484   } else if (flag & SOR_EISENSTAT) {
1485     Vec xx1;
1486 
1487     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1488     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1489 
1490     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492     if (!mat->diag) {
1493       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1494       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1495     }
1496     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1497     if (hasop) {
1498       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1499     } else {
1500       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1501     }
1502     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1503 
1504     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1505 
1506     /* local sweep */
1507     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1508     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1509     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1510   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1511 
1512   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1513 
1514   matin->factorerrortype = mat->A->factorerrortype;
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1519 {
1520   Mat            aA,aB,Aperm;
1521   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1522   PetscScalar    *aa,*ba;
1523   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1524   PetscSF        rowsf,sf;
1525   IS             parcolp = NULL;
1526   PetscBool      done;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1531   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1532   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1533   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1534 
1535   /* Invert row permutation to find out where my rows should go */
1536   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1537   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1538   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1539   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1540   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1541   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1542 
1543   /* Invert column permutation to find out where my columns should go */
1544   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1545   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1546   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1547   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1548   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1549   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1550   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1551 
1552   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1553   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1554   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1555 
1556   /* Find out where my gcols should go */
1557   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1558   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1560   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1561   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1562   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1563   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1564   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1565 
1566   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1567   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1568   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1569   for (i=0; i<m; i++) {
1570     PetscInt row = rdest[i],rowner;
1571     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1572     for (j=ai[i]; j<ai[i+1]; j++) {
1573       PetscInt cowner,col = cdest[aj[j]];
1574       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1575       if (rowner == cowner) dnnz[i]++;
1576       else onnz[i]++;
1577     }
1578     for (j=bi[i]; j<bi[i+1]; j++) {
1579       PetscInt cowner,col = gcdest[bj[j]];
1580       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1581       if (rowner == cowner) dnnz[i]++;
1582       else onnz[i]++;
1583     }
1584   }
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1587   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1588   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1589   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1590 
1591   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1592   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1593   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1594   for (i=0; i<m; i++) {
1595     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1596     PetscInt j0,rowlen;
1597     rowlen = ai[i+1] - ai[i];
1598     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1599       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1600       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1601     }
1602     rowlen = bi[i+1] - bi[i];
1603     for (j0=j=0; j<rowlen; j0=j) {
1604       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1605       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1606     }
1607   }
1608   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1609   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1610   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1611   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1612   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1613   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1614   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1615   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1616   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1617   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1618   *B = Aperm;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1623 {
1624   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1625   PetscErrorCode ierr;
1626 
1627   PetscFunctionBegin;
1628   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1629   if (ghosts) *ghosts = aij->garray;
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1634 {
1635   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1636   Mat            A    = mat->A,B = mat->B;
1637   PetscErrorCode ierr;
1638   PetscReal      isend[5],irecv[5];
1639 
1640   PetscFunctionBegin;
1641   info->block_size = 1.0;
1642   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1643 
1644   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1645   isend[3] = info->memory;  isend[4] = info->mallocs;
1646 
1647   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1648 
1649   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1650   isend[3] += info->memory;  isend[4] += info->mallocs;
1651   if (flag == MAT_LOCAL) {
1652     info->nz_used      = isend[0];
1653     info->nz_allocated = isend[1];
1654     info->nz_unneeded  = isend[2];
1655     info->memory       = isend[3];
1656     info->mallocs      = isend[4];
1657   } else if (flag == MAT_GLOBAL_MAX) {
1658     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1659 
1660     info->nz_used      = irecv[0];
1661     info->nz_allocated = irecv[1];
1662     info->nz_unneeded  = irecv[2];
1663     info->memory       = irecv[3];
1664     info->mallocs      = irecv[4];
1665   } else if (flag == MAT_GLOBAL_SUM) {
1666     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1667 
1668     info->nz_used      = irecv[0];
1669     info->nz_allocated = irecv[1];
1670     info->nz_unneeded  = irecv[2];
1671     info->memory       = irecv[3];
1672     info->mallocs      = irecv[4];
1673   }
1674   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1675   info->fill_ratio_needed = 0;
1676   info->factor_mallocs    = 0;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1681 {
1682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   switch (op) {
1687   case MAT_NEW_NONZERO_LOCATIONS:
1688   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1689   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1690   case MAT_KEEP_NONZERO_PATTERN:
1691   case MAT_NEW_NONZERO_LOCATION_ERR:
1692   case MAT_USE_INODES:
1693   case MAT_IGNORE_ZERO_ENTRIES:
1694     MatCheckPreallocated(A,1);
1695     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1696     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1697     break;
1698   case MAT_ROW_ORIENTED:
1699     MatCheckPreallocated(A,1);
1700     a->roworiented = flg;
1701 
1702     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1703     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1704     break;
1705   case MAT_NEW_DIAGONALS:
1706     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   case MAT_SPD:
1712     A->spd_set = PETSC_TRUE;
1713     A->spd     = flg;
1714     if (flg) {
1715       A->symmetric                  = PETSC_TRUE;
1716       A->structurally_symmetric     = PETSC_TRUE;
1717       A->symmetric_set              = PETSC_TRUE;
1718       A->structurally_symmetric_set = PETSC_TRUE;
1719     }
1720     break;
1721   case MAT_SYMMETRIC:
1722     MatCheckPreallocated(A,1);
1723     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_STRUCTURALLY_SYMMETRIC:
1726     MatCheckPreallocated(A,1);
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_HERMITIAN:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     break;
1733   case MAT_SYMMETRY_ETERNAL:
1734     MatCheckPreallocated(A,1);
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_SUBMAT_SINGLEIS:
1738     A->submat_singleis = flg;
1739     break;
1740   case MAT_STRUCTURE_ONLY:
1741     /* The option is handled directly by MatSetOption() */
1742     break;
1743   default:
1744     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1750 {
1751   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1752   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1753   PetscErrorCode ierr;
1754   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1755   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1756   PetscInt       *cmap,*idx_p;
1757 
1758   PetscFunctionBegin;
1759   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1760   mat->getrowactive = PETSC_TRUE;
1761 
1762   if (!mat->rowvalues && (idx || v)) {
1763     /*
1764         allocate enough space to hold information from the longest row.
1765     */
1766     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1767     PetscInt   max = 1,tmp;
1768     for (i=0; i<matin->rmap->n; i++) {
1769       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1770       if (max < tmp) max = tmp;
1771     }
1772     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1773   }
1774 
1775   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1776   lrow = row - rstart;
1777 
1778   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1779   if (!v)   {pvA = 0; pvB = 0;}
1780   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1781   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1782   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1783   nztot = nzA + nzB;
1784 
1785   cmap = mat->garray;
1786   if (v  || idx) {
1787     if (nztot) {
1788       /* Sort by increasing column numbers, assuming A and B already sorted */
1789       PetscInt imark = -1;
1790       if (v) {
1791         *v = v_p = mat->rowvalues;
1792         for (i=0; i<nzB; i++) {
1793           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1794           else break;
1795         }
1796         imark = i;
1797         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1798         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1799       }
1800       if (idx) {
1801         *idx = idx_p = mat->rowindices;
1802         if (imark > -1) {
1803           for (i=0; i<imark; i++) {
1804             idx_p[i] = cmap[cworkB[i]];
1805           }
1806         } else {
1807           for (i=0; i<nzB; i++) {
1808             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1809             else break;
1810           }
1811           imark = i;
1812         }
1813         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1814         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1815       }
1816     } else {
1817       if (idx) *idx = 0;
1818       if (v)   *v   = 0;
1819     }
1820   }
1821   *nz  = nztot;
1822   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1823   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1828 {
1829   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1830 
1831   PetscFunctionBegin;
1832   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1833   aij->getrowactive = PETSC_FALSE;
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1838 {
1839   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1840   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1841   PetscErrorCode ierr;
1842   PetscInt       i,j,cstart = mat->cmap->rstart;
1843   PetscReal      sum = 0.0;
1844   MatScalar      *v;
1845 
1846   PetscFunctionBegin;
1847   if (aij->size == 1) {
1848     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1849   } else {
1850     if (type == NORM_FROBENIUS) {
1851       v = amat->a;
1852       for (i=0; i<amat->nz; i++) {
1853         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1854       }
1855       v = bmat->a;
1856       for (i=0; i<bmat->nz; i++) {
1857         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1858       }
1859       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1860       *norm = PetscSqrtReal(*norm);
1861       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1862     } else if (type == NORM_1) { /* max column norm */
1863       PetscReal *tmp,*tmp2;
1864       PetscInt  *jj,*garray = aij->garray;
1865       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1866       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1867       *norm = 0.0;
1868       v     = amat->a; jj = amat->j;
1869       for (j=0; j<amat->nz; j++) {
1870         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1871       }
1872       v = bmat->a; jj = bmat->j;
1873       for (j=0; j<bmat->nz; j++) {
1874         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1875       }
1876       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1877       for (j=0; j<mat->cmap->N; j++) {
1878         if (tmp2[j] > *norm) *norm = tmp2[j];
1879       }
1880       ierr = PetscFree(tmp);CHKERRQ(ierr);
1881       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1882       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1883     } else if (type == NORM_INFINITY) { /* max row norm */
1884       PetscReal ntemp = 0.0;
1885       for (j=0; j<aij->A->rmap->n; j++) {
1886         v   = amat->a + amat->i[j];
1887         sum = 0.0;
1888         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1889           sum += PetscAbsScalar(*v); v++;
1890         }
1891         v = bmat->a + bmat->i[j];
1892         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v); v++;
1894         }
1895         if (sum > ntemp) ntemp = sum;
1896       }
1897       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1898       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1899     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1905 {
1906   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1907   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1908   PetscErrorCode ierr;
1909   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1910   PetscInt       cstart = A->cmap->rstart,ncol;
1911   Mat            B;
1912   MatScalar      *array;
1913 
1914   PetscFunctionBegin;
1915   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1916 
1917   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1918   ai = Aloc->i; aj = Aloc->j;
1919   bi = Bloc->i; bj = Bloc->j;
1920   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1921     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1922     PetscSFNode          *oloc;
1923     PETSC_UNUSED PetscSF sf;
1924 
1925     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1926     /* compute d_nnz for preallocation */
1927     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1928     for (i=0; i<ai[ma]; i++) {
1929       d_nnz[aj[i]]++;
1930       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1931     }
1932     /* compute local off-diagonal contributions */
1933     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1934     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1935     /* map those to global */
1936     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1937     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1938     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1939     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1940     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1941     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1942     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1943 
1944     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1945     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1946     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1947     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1948     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1949     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1950   } else {
1951     B    = *matout;
1952     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1953     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1954   }
1955 
1956   /* copy over the A part */
1957   array = Aloc->a;
1958   row   = A->rmap->rstart;
1959   for (i=0; i<ma; i++) {
1960     ncol = ai[i+1]-ai[i];
1961     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1962     row++;
1963     array += ncol; aj += ncol;
1964   }
1965   aj = Aloc->j;
1966   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1967 
1968   /* copy over the B part */
1969   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1970   array = Bloc->a;
1971   row   = A->rmap->rstart;
1972   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1973   cols_tmp = cols;
1974   for (i=0; i<mb; i++) {
1975     ncol = bi[i+1]-bi[i];
1976     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1977     row++;
1978     array += ncol; cols_tmp += ncol;
1979   }
1980   ierr = PetscFree(cols);CHKERRQ(ierr);
1981 
1982   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1983   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1984   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1985     *matout = B;
1986   } else {
1987     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1993 {
1994   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1995   Mat            a    = aij->A,b = aij->B;
1996   PetscErrorCode ierr;
1997   PetscInt       s1,s2,s3;
1998 
1999   PetscFunctionBegin;
2000   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2001   if (rr) {
2002     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2003     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2004     /* Overlap communication with computation. */
2005     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2006   }
2007   if (ll) {
2008     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2009     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2010     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2011   }
2012   /* scale  the diagonal block */
2013   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2014 
2015   if (rr) {
2016     /* Do a scatter end and then right scale the off-diagonal block */
2017     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2018     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2019   }
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2024 {
2025   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2026   PetscErrorCode ierr;
2027 
2028   PetscFunctionBegin;
2029   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2034 {
2035   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2036   Mat            a,b,c,d;
2037   PetscBool      flg;
2038   PetscErrorCode ierr;
2039 
2040   PetscFunctionBegin;
2041   a = matA->A; b = matA->B;
2042   c = matB->A; d = matB->B;
2043 
2044   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2045   if (flg) {
2046     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2047   }
2048   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2053 {
2054   PetscErrorCode ierr;
2055   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2056   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2057 
2058   PetscFunctionBegin;
2059   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2060   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2061     /* because of the column compression in the off-processor part of the matrix a->B,
2062        the number of columns in a->B and b->B may be different, hence we cannot call
2063        the MatCopy() directly on the two parts. If need be, we can provide a more
2064        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2065        then copying the submatrices */
2066     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2067   } else {
2068     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2069     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2070   }
2071   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2076 {
2077   PetscErrorCode ierr;
2078 
2079   PetscFunctionBegin;
2080   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 /*
2085    Computes the number of nonzeros per row needed for preallocation when X and Y
2086    have different nonzero structure.
2087 */
2088 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2089 {
2090   PetscInt       i,j,k,nzx,nzy;
2091 
2092   PetscFunctionBegin;
2093   /* Set the number of nonzeros in the new matrix */
2094   for (i=0; i<m; i++) {
2095     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2096     nzx = xi[i+1] - xi[i];
2097     nzy = yi[i+1] - yi[i];
2098     nnz[i] = 0;
2099     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2100       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2101       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2102       nnz[i]++;
2103     }
2104     for (; k<nzy; k++) nnz[i]++;
2105   }
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2110 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2111 {
2112   PetscErrorCode ierr;
2113   PetscInt       m = Y->rmap->N;
2114   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2115   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2116 
2117   PetscFunctionBegin;
2118   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2123 {
2124   PetscErrorCode ierr;
2125   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2126   PetscBLASInt   bnz,one=1;
2127   Mat_SeqAIJ     *x,*y;
2128 
2129   PetscFunctionBegin;
2130   if (str == SAME_NONZERO_PATTERN) {
2131     PetscScalar alpha = a;
2132     x    = (Mat_SeqAIJ*)xx->A->data;
2133     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2134     y    = (Mat_SeqAIJ*)yy->A->data;
2135     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2136     x    = (Mat_SeqAIJ*)xx->B->data;
2137     y    = (Mat_SeqAIJ*)yy->B->data;
2138     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2139     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2140     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2141   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2142     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2143   } else {
2144     Mat      B;
2145     PetscInt *nnz_d,*nnz_o;
2146     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2147     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2148     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2149     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2150     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2151     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2152     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2153     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2154     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2155     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2156     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2157     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2158     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2159     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2165 
2166 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2167 {
2168 #if defined(PETSC_USE_COMPLEX)
2169   PetscErrorCode ierr;
2170   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2174   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2175 #else
2176   PetscFunctionBegin;
2177 #endif
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2182 {
2183   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2188   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2193 {
2194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2195   PetscErrorCode ierr;
2196 
2197   PetscFunctionBegin;
2198   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2199   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207   PetscInt       i,*idxb = 0;
2208   PetscScalar    *va,*vb;
2209   Vec            vtmp;
2210 
2211   PetscFunctionBegin;
2212   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2213   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2214   if (idx) {
2215     for (i=0; i<A->rmap->n; i++) {
2216       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2217     }
2218   }
2219 
2220   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2221   if (idx) {
2222     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2223   }
2224   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2225   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2226 
2227   for (i=0; i<A->rmap->n; i++) {
2228     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2229       va[i] = vb[i];
2230       if (idx) idx[i] = a->garray[idxb[i]];
2231     }
2232   }
2233 
2234   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2235   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2236   ierr = PetscFree(idxb);CHKERRQ(ierr);
2237   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2242 {
2243   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2244   PetscErrorCode ierr;
2245   PetscInt       i,*idxb = 0;
2246   PetscScalar    *va,*vb;
2247   Vec            vtmp;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2251   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2252   if (idx) {
2253     for (i=0; i<A->cmap->n; i++) {
2254       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2255     }
2256   }
2257 
2258   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2259   if (idx) {
2260     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2261   }
2262   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2263   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2264 
2265   for (i=0; i<A->rmap->n; i++) {
2266     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2267       va[i] = vb[i];
2268       if (idx) idx[i] = a->garray[idxb[i]];
2269     }
2270   }
2271 
2272   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2273   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2274   ierr = PetscFree(idxb);CHKERRQ(ierr);
2275   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2280 {
2281   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2282   PetscInt       n      = A->rmap->n;
2283   PetscInt       cstart = A->cmap->rstart;
2284   PetscInt       *cmap  = mat->garray;
2285   PetscInt       *diagIdx, *offdiagIdx;
2286   Vec            diagV, offdiagV;
2287   PetscScalar    *a, *diagA, *offdiagA;
2288   PetscInt       r;
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2293   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2294   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2295   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2296   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2297   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2298   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2299   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2300   for (r = 0; r < n; ++r) {
2301     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2302       a[r]   = diagA[r];
2303       idx[r] = cstart + diagIdx[r];
2304     } else {
2305       a[r]   = offdiagA[r];
2306       idx[r] = cmap[offdiagIdx[r]];
2307     }
2308   }
2309   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2310   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2311   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2312   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2313   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2314   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2319 {
2320   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2321   PetscInt       n      = A->rmap->n;
2322   PetscInt       cstart = A->cmap->rstart;
2323   PetscInt       *cmap  = mat->garray;
2324   PetscInt       *diagIdx, *offdiagIdx;
2325   Vec            diagV, offdiagV;
2326   PetscScalar    *a, *diagA, *offdiagA;
2327   PetscInt       r;
2328   PetscErrorCode ierr;
2329 
2330   PetscFunctionBegin;
2331   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2332   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2333   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2334   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2335   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2336   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2337   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2338   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2339   for (r = 0; r < n; ++r) {
2340     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2341       a[r]   = diagA[r];
2342       idx[r] = cstart + diagIdx[r];
2343     } else {
2344       a[r]   = offdiagA[r];
2345       idx[r] = cmap[offdiagIdx[r]];
2346     }
2347   }
2348   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2349   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2350   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2351   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2352   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2353   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2358 {
2359   PetscErrorCode ierr;
2360   Mat            *dummy;
2361 
2362   PetscFunctionBegin;
2363   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2364   *newmat = *dummy;
2365   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2370 {
2371   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2376   A->factorerrortype = a->A->factorerrortype;
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2381 {
2382   PetscErrorCode ierr;
2383   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2387   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2388   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2394 {
2395   PetscFunctionBegin;
2396   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2397   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 /*@
2402    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2403 
2404    Collective on Mat
2405 
2406    Input Parameters:
2407 +    A - the matrix
2408 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2409 
2410  Level: advanced
2411 
2412 @*/
2413 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2414 {
2415   PetscErrorCode       ierr;
2416 
2417   PetscFunctionBegin;
2418   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2423 {
2424   PetscErrorCode       ierr;
2425   PetscBool            sc = PETSC_FALSE,flg;
2426 
2427   PetscFunctionBegin;
2428   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2429   ierr = PetscObjectOptionsBegin((PetscObject)A);
2430     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2431     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2432     if (flg) {
2433       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2434     }
2435   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2440 {
2441   PetscErrorCode ierr;
2442   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2443   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2444 
2445   PetscFunctionBegin;
2446   if (!Y->preallocated) {
2447     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2448   } else if (!aij->nz) {
2449     PetscInt nonew = aij->nonew;
2450     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2451     aij->nonew = nonew;
2452   }
2453   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2458 {
2459   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2460   PetscErrorCode ierr;
2461 
2462   PetscFunctionBegin;
2463   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2464   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2465   if (d) {
2466     PetscInt rstart;
2467     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2468     *d += rstart;
2469 
2470   }
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 
2475 /* -------------------------------------------------------------------*/
2476 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2477                                        MatGetRow_MPIAIJ,
2478                                        MatRestoreRow_MPIAIJ,
2479                                        MatMult_MPIAIJ,
2480                                 /* 4*/ MatMultAdd_MPIAIJ,
2481                                        MatMultTranspose_MPIAIJ,
2482                                        MatMultTransposeAdd_MPIAIJ,
2483                                        0,
2484                                        0,
2485                                        0,
2486                                 /*10*/ 0,
2487                                        0,
2488                                        0,
2489                                        MatSOR_MPIAIJ,
2490                                        MatTranspose_MPIAIJ,
2491                                 /*15*/ MatGetInfo_MPIAIJ,
2492                                        MatEqual_MPIAIJ,
2493                                        MatGetDiagonal_MPIAIJ,
2494                                        MatDiagonalScale_MPIAIJ,
2495                                        MatNorm_MPIAIJ,
2496                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2497                                        MatAssemblyEnd_MPIAIJ,
2498                                        MatSetOption_MPIAIJ,
2499                                        MatZeroEntries_MPIAIJ,
2500                                 /*24*/ MatZeroRows_MPIAIJ,
2501                                        0,
2502                                        0,
2503                                        0,
2504                                        0,
2505                                 /*29*/ MatSetUp_MPIAIJ,
2506                                        0,
2507                                        0,
2508                                        MatGetDiagonalBlock_MPIAIJ,
2509                                        0,
2510                                 /*34*/ MatDuplicate_MPIAIJ,
2511                                        0,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                 /*39*/ MatAXPY_MPIAIJ,
2516                                        MatCreateSubMatrices_MPIAIJ,
2517                                        MatIncreaseOverlap_MPIAIJ,
2518                                        MatGetValues_MPIAIJ,
2519                                        MatCopy_MPIAIJ,
2520                                 /*44*/ MatGetRowMax_MPIAIJ,
2521                                        MatScale_MPIAIJ,
2522                                        MatShift_MPIAIJ,
2523                                        MatDiagonalSet_MPIAIJ,
2524                                        MatZeroRowsColumns_MPIAIJ,
2525                                 /*49*/ MatSetRandom_MPIAIJ,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                        0,
2530                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2531                                        0,
2532                                        MatSetUnfactored_MPIAIJ,
2533                                        MatPermute_MPIAIJ,
2534                                        0,
2535                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2536                                        MatDestroy_MPIAIJ,
2537                                        MatView_MPIAIJ,
2538                                        0,
2539                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2540                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2541                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2546                                        MatGetRowMinAbs_MPIAIJ,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                 /*75*/ MatFDColoringApply_AIJ,
2552                                        MatSetFromOptions_MPIAIJ,
2553                                        0,
2554                                        0,
2555                                        MatFindZeroDiagonals_MPIAIJ,
2556                                 /*80*/ 0,
2557                                        0,
2558                                        0,
2559                                 /*83*/ MatLoad_MPIAIJ,
2560                                        MatIsSymmetric_MPIAIJ,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                        0,
2565                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2566                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2567                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2568                                        MatPtAP_MPIAIJ_MPIAIJ,
2569                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2570                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                 /*99*/ 0,
2576                                        0,
2577                                        0,
2578                                        MatConjugate_MPIAIJ,
2579                                        0,
2580                                 /*104*/MatSetValuesRow_MPIAIJ,
2581                                        MatRealPart_MPIAIJ,
2582                                        MatImaginaryPart_MPIAIJ,
2583                                        0,
2584                                        0,
2585                                 /*109*/0,
2586                                        0,
2587                                        MatGetRowMin_MPIAIJ,
2588                                        0,
2589                                        MatMissingDiagonal_MPIAIJ,
2590                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2591                                        0,
2592                                        MatGetGhosts_MPIAIJ,
2593                                        0,
2594                                        0,
2595                                 /*119*/0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                        MatGetMultiProcBlock_MPIAIJ,
2600                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2601                                        MatGetColumnNorms_MPIAIJ,
2602                                        MatInvertBlockDiagonal_MPIAIJ,
2603                                        0,
2604                                        MatCreateSubMatricesMPI_MPIAIJ,
2605                                 /*129*/0,
2606                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2607                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2608                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2609                                        0,
2610                                 /*134*/0,
2611                                        0,
2612                                        MatRARt_MPIAIJ_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                 /*139*/MatSetBlockSizes_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        MatFDColoringSetUp_MPIXAIJ,
2619                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2620                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2621 };
2622 
2623 /* ----------------------------------------------------------------------------------------*/
2624 
2625 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2626 {
2627   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2628   PetscErrorCode ierr;
2629 
2630   PetscFunctionBegin;
2631   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2632   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2637 {
2638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2639   PetscErrorCode ierr;
2640 
2641   PetscFunctionBegin;
2642   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2643   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2648 {
2649   Mat_MPIAIJ     *b;
2650   PetscErrorCode ierr;
2651 
2652   PetscFunctionBegin;
2653   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2654   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2655   b = (Mat_MPIAIJ*)B->data;
2656 
2657 #if defined(PETSC_USE_CTABLE)
2658   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2659 #else
2660   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2661 #endif
2662   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2663   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2664   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2665 
2666   /* Because the B will have been resized we simply destroy it and create a new one each time */
2667   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2668   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2669   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2670   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2671   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2672   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2673 
2674   if (!B->preallocated) {
2675     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2676     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2677     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2678     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2679     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2680   }
2681 
2682   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2683   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2684   B->preallocated  = PETSC_TRUE;
2685   B->was_assembled = PETSC_FALSE;
2686   B->assembled     = PETSC_FALSE;;
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2691 {
2692   Mat_MPIAIJ     *b;
2693   PetscErrorCode ierr;
2694 
2695   PetscFunctionBegin;
2696   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2697   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2698   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2699   b = (Mat_MPIAIJ*)B->data;
2700 
2701 #if defined(PETSC_USE_CTABLE)
2702   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2703 #else
2704   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2705 #endif
2706   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2707   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2708   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2709 
2710   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2711   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2712   B->preallocated  = PETSC_TRUE;
2713   B->was_assembled = PETSC_FALSE;
2714   B->assembled = PETSC_FALSE;
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2719 {
2720   Mat            mat;
2721   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2722   PetscErrorCode ierr;
2723 
2724   PetscFunctionBegin;
2725   *newmat = 0;
2726   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2727   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2728   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2729   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2730   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2731   a       = (Mat_MPIAIJ*)mat->data;
2732 
2733   mat->factortype   = matin->factortype;
2734   mat->assembled    = PETSC_TRUE;
2735   mat->insertmode   = NOT_SET_VALUES;
2736   mat->preallocated = PETSC_TRUE;
2737 
2738   a->size         = oldmat->size;
2739   a->rank         = oldmat->rank;
2740   a->donotstash   = oldmat->donotstash;
2741   a->roworiented  = oldmat->roworiented;
2742   a->rowindices   = 0;
2743   a->rowvalues    = 0;
2744   a->getrowactive = PETSC_FALSE;
2745 
2746   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2747   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2748 
2749   if (oldmat->colmap) {
2750 #if defined(PETSC_USE_CTABLE)
2751     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2752 #else
2753     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2754     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2755     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2756 #endif
2757   } else a->colmap = 0;
2758   if (oldmat->garray) {
2759     PetscInt len;
2760     len  = oldmat->B->cmap->n;
2761     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2762     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2763     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2764   } else a->garray = 0;
2765 
2766   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2767   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2768   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2769   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2770   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2771   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2772   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2773   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2774   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2775   *newmat = mat;
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2780 {
2781   PetscScalar    *vals,*svals;
2782   MPI_Comm       comm;
2783   PetscErrorCode ierr;
2784   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2785   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2786   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2787   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2788   PetscInt       cend,cstart,n,*rowners;
2789   int            fd;
2790   PetscInt       bs = newMat->rmap->bs;
2791 
2792   PetscFunctionBegin;
2793   /* force binary viewer to load .info file if it has not yet done so */
2794   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2795   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2796   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2797   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2798   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2799   if (!rank) {
2800     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2801     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2802     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2803   }
2804 
2805   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2806   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2807   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2808   if (bs < 0) bs = 1;
2809 
2810   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2811   M    = header[1]; N = header[2];
2812 
2813   /* If global sizes are set, check if they are consistent with that given in the file */
2814   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2815   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2816 
2817   /* determine ownership of all (block) rows */
2818   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2819   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2820   else m = newMat->rmap->n; /* Set by user */
2821 
2822   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2823   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2824 
2825   /* First process needs enough room for process with most rows */
2826   if (!rank) {
2827     mmax = rowners[1];
2828     for (i=2; i<=size; i++) {
2829       mmax = PetscMax(mmax, rowners[i]);
2830     }
2831   } else mmax = -1;             /* unused, but compilers complain */
2832 
2833   rowners[0] = 0;
2834   for (i=2; i<=size; i++) {
2835     rowners[i] += rowners[i-1];
2836   }
2837   rstart = rowners[rank];
2838   rend   = rowners[rank+1];
2839 
2840   /* distribute row lengths to all processors */
2841   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2842   if (!rank) {
2843     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2844     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2845     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2846     for (j=0; j<m; j++) {
2847       procsnz[0] += ourlens[j];
2848     }
2849     for (i=1; i<size; i++) {
2850       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2851       /* calculate the number of nonzeros on each processor */
2852       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2853         procsnz[i] += rowlengths[j];
2854       }
2855       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2856     }
2857     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2858   } else {
2859     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2860   }
2861 
2862   if (!rank) {
2863     /* determine max buffer needed and allocate it */
2864     maxnz = 0;
2865     for (i=0; i<size; i++) {
2866       maxnz = PetscMax(maxnz,procsnz[i]);
2867     }
2868     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2869 
2870     /* read in my part of the matrix column indices  */
2871     nz   = procsnz[0];
2872     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2873     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2874 
2875     /* read in every one elses and ship off */
2876     for (i=1; i<size; i++) {
2877       nz   = procsnz[i];
2878       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2879       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2880     }
2881     ierr = PetscFree(cols);CHKERRQ(ierr);
2882   } else {
2883     /* determine buffer space needed for message */
2884     nz = 0;
2885     for (i=0; i<m; i++) {
2886       nz += ourlens[i];
2887     }
2888     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2889 
2890     /* receive message of column indices*/
2891     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2892   }
2893 
2894   /* determine column ownership if matrix is not square */
2895   if (N != M) {
2896     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2897     else n = newMat->cmap->n;
2898     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2899     cstart = cend - n;
2900   } else {
2901     cstart = rstart;
2902     cend   = rend;
2903     n      = cend - cstart;
2904   }
2905 
2906   /* loop over local rows, determining number of off diagonal entries */
2907   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2908   jj   = 0;
2909   for (i=0; i<m; i++) {
2910     for (j=0; j<ourlens[i]; j++) {
2911       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2912       jj++;
2913     }
2914   }
2915 
2916   for (i=0; i<m; i++) {
2917     ourlens[i] -= offlens[i];
2918   }
2919   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2920 
2921   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2922 
2923   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2924 
2925   for (i=0; i<m; i++) {
2926     ourlens[i] += offlens[i];
2927   }
2928 
2929   if (!rank) {
2930     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2931 
2932     /* read in my part of the matrix numerical values  */
2933     nz   = procsnz[0];
2934     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2935 
2936     /* insert into matrix */
2937     jj      = rstart;
2938     smycols = mycols;
2939     svals   = vals;
2940     for (i=0; i<m; i++) {
2941       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2942       smycols += ourlens[i];
2943       svals   += ourlens[i];
2944       jj++;
2945     }
2946 
2947     /* read in other processors and ship out */
2948     for (i=1; i<size; i++) {
2949       nz   = procsnz[i];
2950       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2951       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2954   } else {
2955     /* receive numeric values */
2956     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2957 
2958     /* receive message of values*/
2959     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2960 
2961     /* insert into matrix */
2962     jj      = rstart;
2963     smycols = mycols;
2964     svals   = vals;
2965     for (i=0; i<m; i++) {
2966       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967       smycols += ourlens[i];
2968       svals   += ourlens[i];
2969       jj++;
2970     }
2971   }
2972   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2973   ierr = PetscFree(vals);CHKERRQ(ierr);
2974   ierr = PetscFree(mycols);CHKERRQ(ierr);
2975   ierr = PetscFree(rowners);CHKERRQ(ierr);
2976   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 /* Not scalable because of ISAllGather() unless getting all columns. */
2982 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2983 {
2984   PetscErrorCode ierr;
2985   IS             iscol_local;
2986   PetscBool      isstride;
2987   PetscMPIInt    lisstride=0,gisstride;
2988 
2989   PetscFunctionBegin;
2990   /* check if we are grabbing all columns*/
2991   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2992 
2993   if (isstride) {
2994     PetscInt  start,len,mstart,mlen;
2995     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2996     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2997     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2998     if (mstart == start && mlen-mstart == len) lisstride = 1;
2999   }
3000 
3001   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3002   if (gisstride) {
3003     PetscInt N;
3004     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3005     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3006     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3007     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3008   } else {
3009     PetscInt cbs;
3010     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3011     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3012     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3013   }
3014 
3015   *isseq = iscol_local;
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 /*
3020  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3021  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3022 
3023  Input Parameters:
3024    mat - matrix
3025    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3026            i.e., mat->rstart <= isrow[i] < mat->rend
3027    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3028            i.e., mat->cstart <= iscol[i] < mat->cend
3029  Output Parameter:
3030    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3031    iscol_o - sequential column index set for retrieving mat->B
3032    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3033  */
3034 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3035 {
3036   PetscErrorCode ierr;
3037   Vec            x,cmap;
3038   const PetscInt *is_idx;
3039   PetscScalar    *xarray,*cmaparray;
3040   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3041   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3042   Mat            B=a->B;
3043   Vec            lvec=a->lvec,lcmap;
3044   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3045   MPI_Comm       comm;
3046   PetscMPIInt    rank;
3047   VecScatter     Mvctx;
3048 
3049   PetscFunctionBegin;
3050   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3052   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3053 
3054   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3055   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3056   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3057   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3058 
3059   /* Get start indices */
3060   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3061   isstart -= ncols;
3062   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3063 
3064   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3065   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3066   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3067   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3068   for (i=0; i<ncols; i++) {
3069     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3070     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3071     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3072   }
3073   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3074   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3075   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3076 
3077   /* Get iscol_d */
3078   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3079   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3080   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3081 
3082   /* Get isrow_d */
3083   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3084   rstart = mat->rmap->rstart;
3085   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3086   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3087   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3088   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3089 
3090   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3091   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3092   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3093 
3094   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3095   if (!a->Mvctx_mpi1) {
3096     /* a->Mvctx causes random 'count' in o-build? See src/mat/examples/tests/runex59_2 */
3097     a->Mvctx_mpi1_flg = PETSC_TRUE;
3098     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
3099   }
3100   Mvctx = a->Mvctx_mpi1;
3101   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3102 
3103   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3104 
3105   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3106   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3107   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3108 
3109   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3110   /* off-process column indices */
3111   count = 0;
3112   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3113   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3114 
3115   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3116   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3117   for (i=0; i<Bn; i++) {
3118     if (PetscRealPart(xarray[i]) > -1.0) {
3119       idx[count]     = i;                   /* local column index in off-diagonal part B */
3120       cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3121     }
3122   }
3123   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3124   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3125   /* printf("[%d] count %d\n",rank,count); */
3126   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3127   /* cannot ensure iscol_o has same blocksize as iscol! */
3128 
3129   ierr = PetscFree(idx);CHKERRQ(ierr);
3130 
3131   *garray = cmap1;
3132 
3133   ierr = VecDestroy(&x);CHKERRQ(ierr);
3134   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3135   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3136   PetscFunctionReturn(0);
3137 }
3138 
3139 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3140 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3141 {
3142   PetscErrorCode ierr;
3143   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3144   Mat            M = NULL;
3145   MPI_Comm       comm;
3146   IS             iscol_d,isrow_d,iscol_o;
3147   Mat            Asub = NULL,Bsub = NULL;
3148   PetscInt       n;
3149 
3150   PetscFunctionBegin;
3151   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3152 
3153   if (call == MAT_REUSE_MATRIX) {
3154     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3155     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3156     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3157 
3158     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3159     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3160 
3161     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3162     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3163 
3164     /* Update diagonal and off-diagonal portions of submat */
3165     asub = (Mat_MPIAIJ*)(*submat)->data;
3166     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3167     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3168     if (n) {
3169       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3170     }
3171     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3172     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3173 
3174   } else { /* call == MAT_INITIAL_MATRIX) */
3175     const PetscInt *garray;
3176     PetscInt        BsubN;
3177 
3178     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3179     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3180 
3181     /* Create local submatrices Asub and Bsub */
3182     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3183     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3184 
3185     /* Create submatrix M */
3186     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3187 
3188     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3189     asub = (Mat_MPIAIJ*)M->data;
3190 
3191     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3192     n = asub->B->cmap->N;
3193     if (BsubN > n) {
3194       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3195       const PetscInt *idx;
3196       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3197       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3198 
3199       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3200       j = 0;
3201       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3202       for (i=0; i<n; i++) {
3203         if (j >= BsubN) break;
3204         while (subgarray[i] > garray[j]) j++;
3205 
3206         if (subgarray[i] == garray[j]) {
3207           idx_new[i] = idx[j++];
3208         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3209       }
3210       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3211 
3212       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3213       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3214 
3215     } else if (BsubN < n) {
3216       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3217     }
3218 
3219     ierr = PetscFree(garray);CHKERRQ(ierr);
3220     *submat = M;
3221 
3222     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3223     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3224     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3225 
3226     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3227     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3228 
3229     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3230     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3231   }
3232   PetscFunctionReturn(0);
3233 }
3234 
3235 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3236 {
3237   PetscErrorCode ierr;
3238   IS             iscol_local=NULL,isrow_d;
3239   PetscInt       csize;
3240   PetscInt       n,i,j,start,end;
3241   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3242   MPI_Comm       comm;
3243 
3244   PetscFunctionBegin;
3245   /* If isrow has same processor distribution as mat,
3246      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3247   if (call == MAT_REUSE_MATRIX) {
3248     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3249     if (isrow_d) {
3250       sameRowDist  = PETSC_TRUE;
3251       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3252     } else {
3253       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3254       if (iscol_local) {
3255         sameRowDist  = PETSC_TRUE;
3256         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3257       }
3258     }
3259   } else {
3260     /* Check if isrow has same processor distribution as mat */
3261     sameDist[0] = PETSC_FALSE;
3262     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3263     if (!n) {
3264       sameDist[0] = PETSC_TRUE;
3265     } else {
3266       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3267       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3268       if (i >= start && j < end) {
3269         sameDist[0] = PETSC_TRUE;
3270       }
3271     }
3272 
3273     /* Check if iscol has same processor distribution as mat */
3274     sameDist[1] = PETSC_FALSE;
3275     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3276     if (!n) {
3277       sameDist[1] = PETSC_TRUE;
3278     } else {
3279       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3280       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3281       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3282     }
3283 
3284     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3285     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3286     sameRowDist = tsameDist[0];
3287   }
3288 
3289   if (sameRowDist) {
3290     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3291       /* isrow and iscol have same processor distribution as mat */
3292       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3293       PetscFunctionReturn(0);
3294     } else { /* sameRowDist */
3295       /* isrow has same processor distribution as mat */
3296       if (call == MAT_INITIAL_MATRIX) {
3297         PetscBool sorted;
3298         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3299         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3300         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3301         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3302 
3303         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3304         if (sorted) {
3305           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3306           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3307           PetscFunctionReturn(0);
3308         }
3309       } else { /* call == MAT_REUSE_MATRIX */
3310         IS    iscol_sub;
3311         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3312         if (iscol_sub) {
3313           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3314           PetscFunctionReturn(0);
3315         }
3316       }
3317     }
3318   }
3319 
3320   /* General case: iscol -> iscol_local which has global size of iscol */
3321   if (call == MAT_REUSE_MATRIX) {
3322     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3323     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3324   } else {
3325     if (!iscol_local) {
3326       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3327     }
3328   }
3329 
3330   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3331   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3332 
3333   if (call == MAT_INITIAL_MATRIX) {
3334     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3335     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3336   }
3337   PetscFunctionReturn(0);
3338 }
3339 
3340 /*@C
3341      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3342          and "off-diagonal" part of the matrix in CSR format.
3343 
3344    Collective on MPI_Comm
3345 
3346    Input Parameters:
3347 +  comm - MPI communicator
3348 .  A - "diagonal" portion of matrix
3349 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3350 -  garray - global index of B columns
3351 
3352    Output Parameter:
3353 .   mat - the matrix, with input A as its local diagonal matrix
3354    Level: advanced
3355 
3356    Notes:
3357        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3358        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3359 
3360 .seealso: MatCreateMPIAIJWithSplitArrays()
3361 @*/
3362 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3363 {
3364   PetscErrorCode ierr;
3365   Mat_MPIAIJ     *maij;
3366   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3367   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3368   PetscScalar    *oa=b->a;
3369   Mat            Bnew;
3370   PetscInt       m,n,N;
3371 
3372   PetscFunctionBegin;
3373   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3374   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3375   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3376   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3377   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3378   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3379 
3380   /* Get global columns of mat */
3381   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3382 
3383   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3384   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3385   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3386   maij = (Mat_MPIAIJ*)(*mat)->data;
3387 
3388   (*mat)->preallocated = PETSC_TRUE;
3389 
3390   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3391   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3392 
3393   /* Set A as diagonal portion of *mat */
3394   maij->A = A;
3395 
3396   nz = oi[m];
3397   for (i=0; i<nz; i++) {
3398     col   = oj[i];
3399     oj[i] = garray[col];
3400   }
3401 
3402    /* Set Bnew as off-diagonal portion of *mat */
3403   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3404   bnew        = (Mat_SeqAIJ*)Bnew->data;
3405   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3406   maij->B     = Bnew;
3407 
3408   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3409 
3410   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3411   b->free_a       = PETSC_FALSE;
3412   b->free_ij      = PETSC_FALSE;
3413   ierr = MatDestroy(&B);CHKERRQ(ierr);
3414 
3415   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3416   bnew->free_a       = PETSC_TRUE;
3417   bnew->free_ij      = PETSC_TRUE;
3418 
3419   /* condense columns of maij->B */
3420   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3421   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3422   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3423   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3424   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3425   PetscFunctionReturn(0);
3426 }
3427 
3428 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3429 
3430 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3431 {
3432   PetscErrorCode ierr;
3433   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3434   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3435   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3436   Mat            M,Msub,B=a->B;
3437   MatScalar      *aa;
3438   Mat_SeqAIJ     *aij;
3439   PetscInt       *garray = a->garray,*colsub,Ncols;
3440   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3441   IS             iscol_sub,iscmap;
3442   const PetscInt *is_idx,*cmap;
3443   PetscBool      allcolumns=PETSC_FALSE;
3444   MPI_Comm       comm;
3445 
3446   PetscFunctionBegin;
3447   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3448 
3449   if (call == MAT_REUSE_MATRIX) {
3450     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3451     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3452     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3453 
3454     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3455     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3456 
3457     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3458     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3459 
3460     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3461 
3462   } else { /* call == MAT_INITIAL_MATRIX) */
3463     PetscBool flg;
3464 
3465     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3466     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3467 
3468     /* (1) iscol -> nonscalable iscol_local */
3469     /* Check for special case: each processor gets entire matrix columns */
3470     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3471     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3472     if (allcolumns) {
3473       iscol_sub = iscol_local;
3474       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3475       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3476 
3477     } else {
3478       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3479       PetscInt *idx,*cmap1,k;
3480       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3481       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3482       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3483       count = 0;
3484       k     = 0;
3485       for (i=0; i<Ncols; i++) {
3486         j = is_idx[i];
3487         if (j >= cstart && j < cend) {
3488           /* diagonal part of mat */
3489           idx[count]     = j;
3490           cmap1[count++] = i; /* column index in submat */
3491         } else if (Bn) {
3492           /* off-diagonal part of mat */
3493           if (j == garray[k]) {
3494             idx[count]     = j;
3495             cmap1[count++] = i;  /* column index in submat */
3496           } else if (j > garray[k]) {
3497             while (j > garray[k] && k < Bn-1) k++;
3498             if (j == garray[k]) {
3499               idx[count]     = j;
3500               cmap1[count++] = i; /* column index in submat */
3501             }
3502           }
3503         }
3504       }
3505       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3506 
3507       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3508       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3509       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3510 
3511       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3512     }
3513 
3514     /* (3) Create sequential Msub */
3515     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3516   }
3517 
3518   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3519   aij  = (Mat_SeqAIJ*)(Msub)->data;
3520   ii   = aij->i;
3521   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3522 
3523   /*
3524       m - number of local rows
3525       Ncols - number of columns (same on all processors)
3526       rstart - first row in new global matrix generated
3527   */
3528   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3529 
3530   if (call == MAT_INITIAL_MATRIX) {
3531     /* (4) Create parallel newmat */
3532     PetscMPIInt    rank,size;
3533     PetscInt       csize;
3534 
3535     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3536     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3537 
3538     /*
3539         Determine the number of non-zeros in the diagonal and off-diagonal
3540         portions of the matrix in order to do correct preallocation
3541     */
3542 
3543     /* first get start and end of "diagonal" columns */
3544     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3545     if (csize == PETSC_DECIDE) {
3546       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3547       if (mglobal == Ncols) { /* square matrix */
3548         nlocal = m;
3549       } else {
3550         nlocal = Ncols/size + ((Ncols % size) > rank);
3551       }
3552     } else {
3553       nlocal = csize;
3554     }
3555     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3556     rstart = rend - nlocal;
3557     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3558 
3559     /* next, compute all the lengths */
3560     jj    = aij->j;
3561     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3562     olens = dlens + m;
3563     for (i=0; i<m; i++) {
3564       jend = ii[i+1] - ii[i];
3565       olen = 0;
3566       dlen = 0;
3567       for (j=0; j<jend; j++) {
3568         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3569         else dlen++;
3570         jj++;
3571       }
3572       olens[i] = olen;
3573       dlens[i] = dlen;
3574     }
3575 
3576     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3577     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3578 
3579     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3580     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3581     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3582     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3583     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3584     ierr = PetscFree(dlens);CHKERRQ(ierr);
3585 
3586   } else { /* call == MAT_REUSE_MATRIX */
3587     M    = *newmat;
3588     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3589     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3590     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3591     /*
3592          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3593        rather than the slower MatSetValues().
3594     */
3595     M->was_assembled = PETSC_TRUE;
3596     M->assembled     = PETSC_FALSE;
3597   }
3598 
3599   /* (5) Set values of Msub to *newmat */
3600   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3601   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3602 
3603   jj   = aij->j;
3604   aa   = aij->a;
3605   for (i=0; i<m; i++) {
3606     row = rstart + i;
3607     nz  = ii[i+1] - ii[i];
3608     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3609     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3610     jj += nz; aa += nz;
3611   }
3612   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3613 
3614   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3615   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3616 
3617   ierr = PetscFree(colsub);CHKERRQ(ierr);
3618 
3619   /* save Msub, iscol_sub and iscmap used in processor for next request */
3620   if (call ==  MAT_INITIAL_MATRIX) {
3621     *newmat = M;
3622     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3623     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3624 
3625     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3626     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3627 
3628     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3629     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3630 
3631     if (iscol_local) {
3632       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3633       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3634     }
3635   }
3636   PetscFunctionReturn(0);
3637 }
3638 
3639 /*
3640     Not great since it makes two copies of the submatrix, first an SeqAIJ
3641   in local and then by concatenating the local matrices the end result.
3642   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3643 
3644   Note: This requires a sequential iscol with all indices.
3645 */
3646 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3647 {
3648   PetscErrorCode ierr;
3649   PetscMPIInt    rank,size;
3650   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3651   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3652   Mat            M,Mreuse;
3653   MatScalar      *aa,*vwork;
3654   MPI_Comm       comm;
3655   Mat_SeqAIJ     *aij;
3656   PetscBool      colflag,allcolumns=PETSC_FALSE;
3657 
3658   PetscFunctionBegin;
3659   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3660   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3661   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3662 
3663   /* Check for special case: each processor gets entire matrix columns */
3664   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3665   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3666   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3667 
3668   if (call ==  MAT_REUSE_MATRIX) {
3669     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3670     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3671     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3672   } else {
3673     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3674   }
3675 
3676   /*
3677       m - number of local rows
3678       n - number of columns (same on all processors)
3679       rstart - first row in new global matrix generated
3680   */
3681   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3682   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3683   if (call == MAT_INITIAL_MATRIX) {
3684     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3685     ii  = aij->i;
3686     jj  = aij->j;
3687 
3688     /*
3689         Determine the number of non-zeros in the diagonal and off-diagonal
3690         portions of the matrix in order to do correct preallocation
3691     */
3692 
3693     /* first get start and end of "diagonal" columns */
3694     if (csize == PETSC_DECIDE) {
3695       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3696       if (mglobal == n) { /* square matrix */
3697         nlocal = m;
3698       } else {
3699         nlocal = n/size + ((n % size) > rank);
3700       }
3701     } else {
3702       nlocal = csize;
3703     }
3704     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3705     rstart = rend - nlocal;
3706     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3707 
3708     /* next, compute all the lengths */
3709     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3710     olens = dlens + m;
3711     for (i=0; i<m; i++) {
3712       jend = ii[i+1] - ii[i];
3713       olen = 0;
3714       dlen = 0;
3715       for (j=0; j<jend; j++) {
3716         if (*jj < rstart || *jj >= rend) olen++;
3717         else dlen++;
3718         jj++;
3719       }
3720       olens[i] = olen;
3721       dlens[i] = dlen;
3722     }
3723     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3724     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3725     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3726     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3727     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3728     ierr = PetscFree(dlens);CHKERRQ(ierr);
3729   } else {
3730     PetscInt ml,nl;
3731 
3732     M    = *newmat;
3733     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3734     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3735     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3736     /*
3737          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3738        rather than the slower MatSetValues().
3739     */
3740     M->was_assembled = PETSC_TRUE;
3741     M->assembled     = PETSC_FALSE;
3742   }
3743   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3744   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3745   ii   = aij->i;
3746   jj   = aij->j;
3747   aa   = aij->a;
3748   for (i=0; i<m; i++) {
3749     row   = rstart + i;
3750     nz    = ii[i+1] - ii[i];
3751     cwork = jj;     jj += nz;
3752     vwork = aa;     aa += nz;
3753     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3754   }
3755 
3756   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3757   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3758   *newmat = M;
3759 
3760   /* save submatrix used in processor for next request */
3761   if (call ==  MAT_INITIAL_MATRIX) {
3762     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3763     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3764   }
3765   PetscFunctionReturn(0);
3766 }
3767 
3768 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3769 {
3770   PetscInt       m,cstart, cend,j,nnz,i,d;
3771   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3772   const PetscInt *JJ;
3773   PetscScalar    *values;
3774   PetscErrorCode ierr;
3775   PetscBool      nooffprocentries;
3776 
3777   PetscFunctionBegin;
3778   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3779 
3780   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3781   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3782   m      = B->rmap->n;
3783   cstart = B->cmap->rstart;
3784   cend   = B->cmap->rend;
3785   rstart = B->rmap->rstart;
3786 
3787   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3788 
3789 #if defined(PETSC_USE_DEBUGGING)
3790   for (i=0; i<m; i++) {
3791     nnz = Ii[i+1]- Ii[i];
3792     JJ  = J + Ii[i];
3793     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3794     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3795     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3796   }
3797 #endif
3798 
3799   for (i=0; i<m; i++) {
3800     nnz     = Ii[i+1]- Ii[i];
3801     JJ      = J + Ii[i];
3802     nnz_max = PetscMax(nnz_max,nnz);
3803     d       = 0;
3804     for (j=0; j<nnz; j++) {
3805       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3806     }
3807     d_nnz[i] = d;
3808     o_nnz[i] = nnz - d;
3809   }
3810   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3811   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3812 
3813   if (v) values = (PetscScalar*)v;
3814   else {
3815     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3816   }
3817 
3818   for (i=0; i<m; i++) {
3819     ii   = i + rstart;
3820     nnz  = Ii[i+1]- Ii[i];
3821     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3822   }
3823   nooffprocentries    = B->nooffprocentries;
3824   B->nooffprocentries = PETSC_TRUE;
3825   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3826   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3827   B->nooffprocentries = nooffprocentries;
3828 
3829   if (!v) {
3830     ierr = PetscFree(values);CHKERRQ(ierr);
3831   }
3832   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 /*@
3837    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3838    (the default parallel PETSc format).
3839 
3840    Collective on MPI_Comm
3841 
3842    Input Parameters:
3843 +  B - the matrix
3844 .  i - the indices into j for the start of each local row (starts with zero)
3845 .  j - the column indices for each local row (starts with zero)
3846 -  v - optional values in the matrix
3847 
3848    Level: developer
3849 
3850    Notes:
3851        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3852      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3853      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3854 
3855        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3856 
3857        The format which is used for the sparse matrix input, is equivalent to a
3858     row-major ordering.. i.e for the following matrix, the input data expected is
3859     as shown
3860 
3861 $        1 0 0
3862 $        2 0 3     P0
3863 $       -------
3864 $        4 5 6     P1
3865 $
3866 $     Process0 [P0]: rows_owned=[0,1]
3867 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3868 $        j =  {0,0,2}  [size = 3]
3869 $        v =  {1,2,3}  [size = 3]
3870 $
3871 $     Process1 [P1]: rows_owned=[2]
3872 $        i =  {0,3}    [size = nrow+1  = 1+1]
3873 $        j =  {0,1,2}  [size = 3]
3874 $        v =  {4,5,6}  [size = 3]
3875 
3876 .keywords: matrix, aij, compressed row, sparse, parallel
3877 
3878 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3879           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3880 @*/
3881 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3882 {
3883   PetscErrorCode ierr;
3884 
3885   PetscFunctionBegin;
3886   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3887   PetscFunctionReturn(0);
3888 }
3889 
3890 /*@C
3891    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3892    (the default parallel PETSc format).  For good matrix assembly performance
3893    the user should preallocate the matrix storage by setting the parameters
3894    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3895    performance can be increased by more than a factor of 50.
3896 
3897    Collective on MPI_Comm
3898 
3899    Input Parameters:
3900 +  B - the matrix
3901 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3902            (same value is used for all local rows)
3903 .  d_nnz - array containing the number of nonzeros in the various rows of the
3904            DIAGONAL portion of the local submatrix (possibly different for each row)
3905            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3906            The size of this array is equal to the number of local rows, i.e 'm'.
3907            For matrices that will be factored, you must leave room for (and set)
3908            the diagonal entry even if it is zero.
3909 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3910            submatrix (same value is used for all local rows).
3911 -  o_nnz - array containing the number of nonzeros in the various rows of the
3912            OFF-DIAGONAL portion of the local submatrix (possibly different for
3913            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3914            structure. The size of this array is equal to the number
3915            of local rows, i.e 'm'.
3916 
3917    If the *_nnz parameter is given then the *_nz parameter is ignored
3918 
3919    The AIJ format (also called the Yale sparse matrix format or
3920    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3921    storage.  The stored row and column indices begin with zero.
3922    See Users-Manual: ch_mat for details.
3923 
3924    The parallel matrix is partitioned such that the first m0 rows belong to
3925    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3926    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3927 
3928    The DIAGONAL portion of the local submatrix of a processor can be defined
3929    as the submatrix which is obtained by extraction the part corresponding to
3930    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3931    first row that belongs to the processor, r2 is the last row belonging to
3932    the this processor, and c1-c2 is range of indices of the local part of a
3933    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3934    common case of a square matrix, the row and column ranges are the same and
3935    the DIAGONAL part is also square. The remaining portion of the local
3936    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3937 
3938    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3939 
3940    You can call MatGetInfo() to get information on how effective the preallocation was;
3941    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3942    You can also run with the option -info and look for messages with the string
3943    malloc in them to see if additional memory allocation was needed.
3944 
3945    Example usage:
3946 
3947    Consider the following 8x8 matrix with 34 non-zero values, that is
3948    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3949    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3950    as follows:
3951 
3952 .vb
3953             1  2  0  |  0  3  0  |  0  4
3954     Proc0   0  5  6  |  7  0  0  |  8  0
3955             9  0 10  | 11  0  0  | 12  0
3956     -------------------------------------
3957            13  0 14  | 15 16 17  |  0  0
3958     Proc1   0 18  0  | 19 20 21  |  0  0
3959             0  0  0  | 22 23  0  | 24  0
3960     -------------------------------------
3961     Proc2  25 26 27  |  0  0 28  | 29  0
3962            30  0  0  | 31 32 33  |  0 34
3963 .ve
3964 
3965    This can be represented as a collection of submatrices as:
3966 
3967 .vb
3968       A B C
3969       D E F
3970       G H I
3971 .ve
3972 
3973    Where the submatrices A,B,C are owned by proc0, D,E,F are
3974    owned by proc1, G,H,I are owned by proc2.
3975 
3976    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3977    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3978    The 'M','N' parameters are 8,8, and have the same values on all procs.
3979 
3980    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3981    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3982    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3983    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3984    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3985    matrix, ans [DF] as another SeqAIJ matrix.
3986 
3987    When d_nz, o_nz parameters are specified, d_nz storage elements are
3988    allocated for every row of the local diagonal submatrix, and o_nz
3989    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3990    One way to choose d_nz and o_nz is to use the max nonzerors per local
3991    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3992    In this case, the values of d_nz,o_nz are:
3993 .vb
3994      proc0 : dnz = 2, o_nz = 2
3995      proc1 : dnz = 3, o_nz = 2
3996      proc2 : dnz = 1, o_nz = 4
3997 .ve
3998    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3999    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4000    for proc3. i.e we are using 12+15+10=37 storage locations to store
4001    34 values.
4002 
4003    When d_nnz, o_nnz parameters are specified, the storage is specified
4004    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4005    In the above case the values for d_nnz,o_nnz are:
4006 .vb
4007      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4008      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4009      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4010 .ve
4011    Here the space allocated is sum of all the above values i.e 34, and
4012    hence pre-allocation is perfect.
4013 
4014    Level: intermediate
4015 
4016 .keywords: matrix, aij, compressed row, sparse, parallel
4017 
4018 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4019           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4020 @*/
4021 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4022 {
4023   PetscErrorCode ierr;
4024 
4025   PetscFunctionBegin;
4026   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4027   PetscValidType(B,1);
4028   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4029   PetscFunctionReturn(0);
4030 }
4031 
4032 /*@
4033      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4034          CSR format the local rows.
4035 
4036    Collective on MPI_Comm
4037 
4038    Input Parameters:
4039 +  comm - MPI communicator
4040 .  m - number of local rows (Cannot be PETSC_DECIDE)
4041 .  n - This value should be the same as the local size used in creating the
4042        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4043        calculated if N is given) For square matrices n is almost always m.
4044 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4045 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4046 .   i - row indices
4047 .   j - column indices
4048 -   a - matrix values
4049 
4050    Output Parameter:
4051 .   mat - the matrix
4052 
4053    Level: intermediate
4054 
4055    Notes:
4056        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4057      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4058      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4059 
4060        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4061 
4062        The format which is used for the sparse matrix input, is equivalent to a
4063     row-major ordering.. i.e for the following matrix, the input data expected is
4064     as shown
4065 
4066 $        1 0 0
4067 $        2 0 3     P0
4068 $       -------
4069 $        4 5 6     P1
4070 $
4071 $     Process0 [P0]: rows_owned=[0,1]
4072 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4073 $        j =  {0,0,2}  [size = 3]
4074 $        v =  {1,2,3}  [size = 3]
4075 $
4076 $     Process1 [P1]: rows_owned=[2]
4077 $        i =  {0,3}    [size = nrow+1  = 1+1]
4078 $        j =  {0,1,2}  [size = 3]
4079 $        v =  {4,5,6}  [size = 3]
4080 
4081 .keywords: matrix, aij, compressed row, sparse, parallel
4082 
4083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4084           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4085 @*/
4086 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4087 {
4088   PetscErrorCode ierr;
4089 
4090   PetscFunctionBegin;
4091   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4092   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4093   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4094   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4095   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4096   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4097   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4098   PetscFunctionReturn(0);
4099 }
4100 
4101 /*@C
4102    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4103    (the default parallel PETSc format).  For good matrix assembly performance
4104    the user should preallocate the matrix storage by setting the parameters
4105    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4106    performance can be increased by more than a factor of 50.
4107 
4108    Collective on MPI_Comm
4109 
4110    Input Parameters:
4111 +  comm - MPI communicator
4112 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4113            This value should be the same as the local size used in creating the
4114            y vector for the matrix-vector product y = Ax.
4115 .  n - This value should be the same as the local size used in creating the
4116        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4117        calculated if N is given) For square matrices n is almost always m.
4118 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4119 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4120 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4121            (same value is used for all local rows)
4122 .  d_nnz - array containing the number of nonzeros in the various rows of the
4123            DIAGONAL portion of the local submatrix (possibly different for each row)
4124            or NULL, if d_nz is used to specify the nonzero structure.
4125            The size of this array is equal to the number of local rows, i.e 'm'.
4126 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4127            submatrix (same value is used for all local rows).
4128 -  o_nnz - array containing the number of nonzeros in the various rows of the
4129            OFF-DIAGONAL portion of the local submatrix (possibly different for
4130            each row) or NULL, if o_nz is used to specify the nonzero
4131            structure. The size of this array is equal to the number
4132            of local rows, i.e 'm'.
4133 
4134    Output Parameter:
4135 .  A - the matrix
4136 
4137    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4138    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4139    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4140 
4141    Notes:
4142    If the *_nnz parameter is given then the *_nz parameter is ignored
4143 
4144    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4145    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4146    storage requirements for this matrix.
4147 
4148    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4149    processor than it must be used on all processors that share the object for
4150    that argument.
4151 
4152    The user MUST specify either the local or global matrix dimensions
4153    (possibly both).
4154 
4155    The parallel matrix is partitioned across processors such that the
4156    first m0 rows belong to process 0, the next m1 rows belong to
4157    process 1, the next m2 rows belong to process 2 etc.. where
4158    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4159    values corresponding to [m x N] submatrix.
4160 
4161    The columns are logically partitioned with the n0 columns belonging
4162    to 0th partition, the next n1 columns belonging to the next
4163    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4164 
4165    The DIAGONAL portion of the local submatrix on any given processor
4166    is the submatrix corresponding to the rows and columns m,n
4167    corresponding to the given processor. i.e diagonal matrix on
4168    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4169    etc. The remaining portion of the local submatrix [m x (N-n)]
4170    constitute the OFF-DIAGONAL portion. The example below better
4171    illustrates this concept.
4172 
4173    For a square global matrix we define each processor's diagonal portion
4174    to be its local rows and the corresponding columns (a square submatrix);
4175    each processor's off-diagonal portion encompasses the remainder of the
4176    local matrix (a rectangular submatrix).
4177 
4178    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4179 
4180    When calling this routine with a single process communicator, a matrix of
4181    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4182    type of communicator, use the construction mechanism
4183 .vb
4184      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4185 .ve
4186 
4187 $     MatCreate(...,&A);
4188 $     MatSetType(A,MATMPIAIJ);
4189 $     MatSetSizes(A, m,n,M,N);
4190 $     MatMPIAIJSetPreallocation(A,...);
4191 
4192    By default, this format uses inodes (identical nodes) when possible.
4193    We search for consecutive rows with the same nonzero structure, thereby
4194    reusing matrix information to achieve increased efficiency.
4195 
4196    Options Database Keys:
4197 +  -mat_no_inode  - Do not use inodes
4198 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4199 -  -mat_aij_oneindex - Internally use indexing starting at 1
4200         rather than 0.  Note that when calling MatSetValues(),
4201         the user still MUST index entries starting at 0!
4202 
4203 
4204    Example usage:
4205 
4206    Consider the following 8x8 matrix with 34 non-zero values, that is
4207    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4208    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4209    as follows
4210 
4211 .vb
4212             1  2  0  |  0  3  0  |  0  4
4213     Proc0   0  5  6  |  7  0  0  |  8  0
4214             9  0 10  | 11  0  0  | 12  0
4215     -------------------------------------
4216            13  0 14  | 15 16 17  |  0  0
4217     Proc1   0 18  0  | 19 20 21  |  0  0
4218             0  0  0  | 22 23  0  | 24  0
4219     -------------------------------------
4220     Proc2  25 26 27  |  0  0 28  | 29  0
4221            30  0  0  | 31 32 33  |  0 34
4222 .ve
4223 
4224    This can be represented as a collection of submatrices as
4225 
4226 .vb
4227       A B C
4228       D E F
4229       G H I
4230 .ve
4231 
4232    Where the submatrices A,B,C are owned by proc0, D,E,F are
4233    owned by proc1, G,H,I are owned by proc2.
4234 
4235    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4236    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4237    The 'M','N' parameters are 8,8, and have the same values on all procs.
4238 
4239    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4240    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4241    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4242    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4243    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4244    matrix, ans [DF] as another SeqAIJ matrix.
4245 
4246    When d_nz, o_nz parameters are specified, d_nz storage elements are
4247    allocated for every row of the local diagonal submatrix, and o_nz
4248    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4249    One way to choose d_nz and o_nz is to use the max nonzerors per local
4250    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4251    In this case, the values of d_nz,o_nz are
4252 .vb
4253      proc0 : dnz = 2, o_nz = 2
4254      proc1 : dnz = 3, o_nz = 2
4255      proc2 : dnz = 1, o_nz = 4
4256 .ve
4257    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4258    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4259    for proc3. i.e we are using 12+15+10=37 storage locations to store
4260    34 values.
4261 
4262    When d_nnz, o_nnz parameters are specified, the storage is specified
4263    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4264    In the above case the values for d_nnz,o_nnz are
4265 .vb
4266      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4267      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4268      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4269 .ve
4270    Here the space allocated is sum of all the above values i.e 34, and
4271    hence pre-allocation is perfect.
4272 
4273    Level: intermediate
4274 
4275 .keywords: matrix, aij, compressed row, sparse, parallel
4276 
4277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4278           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4279 @*/
4280 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4281 {
4282   PetscErrorCode ierr;
4283   PetscMPIInt    size;
4284 
4285   PetscFunctionBegin;
4286   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4287   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4288   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4289   if (size > 1) {
4290     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4291     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4292   } else {
4293     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4294     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4295   }
4296   PetscFunctionReturn(0);
4297 }
4298 
4299 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4300 {
4301   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4302   PetscBool      flg;
4303   PetscErrorCode ierr;
4304 
4305   PetscFunctionBegin;
4306   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4307   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4308   if (Ad)     *Ad     = a->A;
4309   if (Ao)     *Ao     = a->B;
4310   if (colmap) *colmap = a->garray;
4311   PetscFunctionReturn(0);
4312 }
4313 
4314 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4315 {
4316   PetscErrorCode ierr;
4317   PetscInt       m,N,i,rstart,nnz,Ii;
4318   PetscInt       *indx;
4319   PetscScalar    *values;
4320 
4321   PetscFunctionBegin;
4322   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4323   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4324     PetscInt       *dnz,*onz,sum,bs,cbs;
4325 
4326     if (n == PETSC_DECIDE) {
4327       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4328     }
4329     /* Check sum(n) = N */
4330     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4331     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4332 
4333     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4334     rstart -= m;
4335 
4336     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4337     for (i=0; i<m; i++) {
4338       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4339       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4340       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4341     }
4342 
4343     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4344     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4345     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4346     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4347     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4348     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4349     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4350     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4351   }
4352 
4353   /* numeric phase */
4354   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4355   for (i=0; i<m; i++) {
4356     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4357     Ii   = i + rstart;
4358     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4359     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4360   }
4361   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4362   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4363   PetscFunctionReturn(0);
4364 }
4365 
4366 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4367 {
4368   PetscErrorCode    ierr;
4369   PetscMPIInt       rank;
4370   PetscInt          m,N,i,rstart,nnz;
4371   size_t            len;
4372   const PetscInt    *indx;
4373   PetscViewer       out;
4374   char              *name;
4375   Mat               B;
4376   const PetscScalar *values;
4377 
4378   PetscFunctionBegin;
4379   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4380   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4381   /* Should this be the type of the diagonal block of A? */
4382   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4383   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4384   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4385   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4386   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4387   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4388   for (i=0; i<m; i++) {
4389     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4390     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4391     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4392   }
4393   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4394   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4395 
4396   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4397   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4398   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4399   sprintf(name,"%s.%d",outfile,rank);
4400   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4401   ierr = PetscFree(name);CHKERRQ(ierr);
4402   ierr = MatView(B,out);CHKERRQ(ierr);
4403   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4404   ierr = MatDestroy(&B);CHKERRQ(ierr);
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4409 {
4410   PetscErrorCode      ierr;
4411   Mat_Merge_SeqsToMPI *merge;
4412   PetscContainer      container;
4413 
4414   PetscFunctionBegin;
4415   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4416   if (container) {
4417     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4430     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4431     ierr = PetscFree(merge);CHKERRQ(ierr);
4432     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4433   }
4434   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 #include <../src/mat/utils/freespace.h>
4439 #include <petscbt.h>
4440 
4441 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4442 {
4443   PetscErrorCode      ierr;
4444   MPI_Comm            comm;
4445   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4446   PetscMPIInt         size,rank,taga,*len_s;
4447   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4448   PetscInt            proc,m;
4449   PetscInt            **buf_ri,**buf_rj;
4450   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4451   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4452   MPI_Request         *s_waits,*r_waits;
4453   MPI_Status          *status;
4454   MatScalar           *aa=a->a;
4455   MatScalar           **abuf_r,*ba_i;
4456   Mat_Merge_SeqsToMPI *merge;
4457   PetscContainer      container;
4458 
4459   PetscFunctionBegin;
4460   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4461   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4462 
4463   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4464   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4465 
4466   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4467   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4468 
4469   bi     = merge->bi;
4470   bj     = merge->bj;
4471   buf_ri = merge->buf_ri;
4472   buf_rj = merge->buf_rj;
4473 
4474   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4475   owners = merge->rowmap->range;
4476   len_s  = merge->len_s;
4477 
4478   /* send and recv matrix values */
4479   /*-----------------------------*/
4480   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4481   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4482 
4483   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4484   for (proc=0,k=0; proc<size; proc++) {
4485     if (!len_s[proc]) continue;
4486     i    = owners[proc];
4487     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4488     k++;
4489   }
4490 
4491   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4492   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4493   ierr = PetscFree(status);CHKERRQ(ierr);
4494 
4495   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4496   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4497 
4498   /* insert mat values of mpimat */
4499   /*----------------------------*/
4500   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4501   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4502 
4503   for (k=0; k<merge->nrecv; k++) {
4504     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4505     nrows       = *(buf_ri_k[k]);
4506     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4507     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4508   }
4509 
4510   /* set values of ba */
4511   m = merge->rowmap->n;
4512   for (i=0; i<m; i++) {
4513     arow = owners[rank] + i;
4514     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4515     bnzi = bi[i+1] - bi[i];
4516     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4517 
4518     /* add local non-zero vals of this proc's seqmat into ba */
4519     anzi   = ai[arow+1] - ai[arow];
4520     aj     = a->j + ai[arow];
4521     aa     = a->a + ai[arow];
4522     nextaj = 0;
4523     for (j=0; nextaj<anzi; j++) {
4524       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4525         ba_i[j] += aa[nextaj++];
4526       }
4527     }
4528 
4529     /* add received vals into ba */
4530     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4531       /* i-th row */
4532       if (i == *nextrow[k]) {
4533         anzi   = *(nextai[k]+1) - *nextai[k];
4534         aj     = buf_rj[k] + *(nextai[k]);
4535         aa     = abuf_r[k] + *(nextai[k]);
4536         nextaj = 0;
4537         for (j=0; nextaj<anzi; j++) {
4538           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4539             ba_i[j] += aa[nextaj++];
4540           }
4541         }
4542         nextrow[k]++; nextai[k]++;
4543       }
4544     }
4545     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4546   }
4547   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4548   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4549 
4550   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4551   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4552   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4553   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4554   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4555   PetscFunctionReturn(0);
4556 }
4557 
4558 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4559 {
4560   PetscErrorCode      ierr;
4561   Mat                 B_mpi;
4562   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4563   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4564   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4565   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4566   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4567   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4568   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4569   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4570   MPI_Status          *status;
4571   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4572   PetscBT             lnkbt;
4573   Mat_Merge_SeqsToMPI *merge;
4574   PetscContainer      container;
4575 
4576   PetscFunctionBegin;
4577   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4578 
4579   /* make sure it is a PETSc comm */
4580   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4581   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4582   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4583 
4584   ierr = PetscNew(&merge);CHKERRQ(ierr);
4585   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4586 
4587   /* determine row ownership */
4588   /*---------------------------------------------------------*/
4589   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4590   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4591   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4592   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4593   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4594   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4595   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4596 
4597   m      = merge->rowmap->n;
4598   owners = merge->rowmap->range;
4599 
4600   /* determine the number of messages to send, their lengths */
4601   /*---------------------------------------------------------*/
4602   len_s = merge->len_s;
4603 
4604   len          = 0; /* length of buf_si[] */
4605   merge->nsend = 0;
4606   for (proc=0; proc<size; proc++) {
4607     len_si[proc] = 0;
4608     if (proc == rank) {
4609       len_s[proc] = 0;
4610     } else {
4611       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4612       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4613     }
4614     if (len_s[proc]) {
4615       merge->nsend++;
4616       nrows = 0;
4617       for (i=owners[proc]; i<owners[proc+1]; i++) {
4618         if (ai[i+1] > ai[i]) nrows++;
4619       }
4620       len_si[proc] = 2*(nrows+1);
4621       len         += len_si[proc];
4622     }
4623   }
4624 
4625   /* determine the number and length of messages to receive for ij-structure */
4626   /*-------------------------------------------------------------------------*/
4627   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4628   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4629 
4630   /* post the Irecv of j-structure */
4631   /*-------------------------------*/
4632   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4633   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4634 
4635   /* post the Isend of j-structure */
4636   /*--------------------------------*/
4637   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4638 
4639   for (proc=0, k=0; proc<size; proc++) {
4640     if (!len_s[proc]) continue;
4641     i    = owners[proc];
4642     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4643     k++;
4644   }
4645 
4646   /* receives and sends of j-structure are complete */
4647   /*------------------------------------------------*/
4648   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4649   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4650 
4651   /* send and recv i-structure */
4652   /*---------------------------*/
4653   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4654   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4655 
4656   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4657   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4658   for (proc=0,k=0; proc<size; proc++) {
4659     if (!len_s[proc]) continue;
4660     /* form outgoing message for i-structure:
4661          buf_si[0]:                 nrows to be sent
4662                [1:nrows]:           row index (global)
4663                [nrows+1:2*nrows+1]: i-structure index
4664     */
4665     /*-------------------------------------------*/
4666     nrows       = len_si[proc]/2 - 1;
4667     buf_si_i    = buf_si + nrows+1;
4668     buf_si[0]   = nrows;
4669     buf_si_i[0] = 0;
4670     nrows       = 0;
4671     for (i=owners[proc]; i<owners[proc+1]; i++) {
4672       anzi = ai[i+1] - ai[i];
4673       if (anzi) {
4674         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4675         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4676         nrows++;
4677       }
4678     }
4679     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4680     k++;
4681     buf_si += len_si[proc];
4682   }
4683 
4684   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4685   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4686 
4687   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4688   for (i=0; i<merge->nrecv; i++) {
4689     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4690   }
4691 
4692   ierr = PetscFree(len_si);CHKERRQ(ierr);
4693   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4694   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4695   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4696   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4697   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4698   ierr = PetscFree(status);CHKERRQ(ierr);
4699 
4700   /* compute a local seq matrix in each processor */
4701   /*----------------------------------------------*/
4702   /* allocate bi array and free space for accumulating nonzero column info */
4703   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4704   bi[0] = 0;
4705 
4706   /* create and initialize a linked list */
4707   nlnk = N+1;
4708   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4709 
4710   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4711   len  = ai[owners[rank+1]] - ai[owners[rank]];
4712   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4713 
4714   current_space = free_space;
4715 
4716   /* determine symbolic info for each local row */
4717   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4718 
4719   for (k=0; k<merge->nrecv; k++) {
4720     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4721     nrows       = *buf_ri_k[k];
4722     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4723     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4724   }
4725 
4726   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4727   len  = 0;
4728   for (i=0; i<m; i++) {
4729     bnzi = 0;
4730     /* add local non-zero cols of this proc's seqmat into lnk */
4731     arow  = owners[rank] + i;
4732     anzi  = ai[arow+1] - ai[arow];
4733     aj    = a->j + ai[arow];
4734     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4735     bnzi += nlnk;
4736     /* add received col data into lnk */
4737     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4738       if (i == *nextrow[k]) { /* i-th row */
4739         anzi  = *(nextai[k]+1) - *nextai[k];
4740         aj    = buf_rj[k] + *nextai[k];
4741         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4742         bnzi += nlnk;
4743         nextrow[k]++; nextai[k]++;
4744       }
4745     }
4746     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4747 
4748     /* if free space is not available, make more free space */
4749     if (current_space->local_remaining<bnzi) {
4750       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4751       nspacedouble++;
4752     }
4753     /* copy data into free space, then initialize lnk */
4754     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4755     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4756 
4757     current_space->array           += bnzi;
4758     current_space->local_used      += bnzi;
4759     current_space->local_remaining -= bnzi;
4760 
4761     bi[i+1] = bi[i] + bnzi;
4762   }
4763 
4764   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4765 
4766   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4767   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4768   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4769 
4770   /* create symbolic parallel matrix B_mpi */
4771   /*---------------------------------------*/
4772   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4773   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4774   if (n==PETSC_DECIDE) {
4775     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4776   } else {
4777     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4778   }
4779   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4780   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4781   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4782   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4783   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4784 
4785   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4786   B_mpi->assembled    = PETSC_FALSE;
4787   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4788   merge->bi           = bi;
4789   merge->bj           = bj;
4790   merge->buf_ri       = buf_ri;
4791   merge->buf_rj       = buf_rj;
4792   merge->coi          = NULL;
4793   merge->coj          = NULL;
4794   merge->owners_co    = NULL;
4795 
4796   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4797 
4798   /* attach the supporting struct to B_mpi for reuse */
4799   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4800   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4801   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4802   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4803   *mpimat = B_mpi;
4804 
4805   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4806   PetscFunctionReturn(0);
4807 }
4808 
4809 /*@C
4810       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4811                  matrices from each processor
4812 
4813     Collective on MPI_Comm
4814 
4815    Input Parameters:
4816 +    comm - the communicators the parallel matrix will live on
4817 .    seqmat - the input sequential matrices
4818 .    m - number of local rows (or PETSC_DECIDE)
4819 .    n - number of local columns (or PETSC_DECIDE)
4820 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4821 
4822    Output Parameter:
4823 .    mpimat - the parallel matrix generated
4824 
4825     Level: advanced
4826 
4827    Notes:
4828      The dimensions of the sequential matrix in each processor MUST be the same.
4829      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4830      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4831 @*/
4832 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4833 {
4834   PetscErrorCode ierr;
4835   PetscMPIInt    size;
4836 
4837   PetscFunctionBegin;
4838   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4839   if (size == 1) {
4840     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4841     if (scall == MAT_INITIAL_MATRIX) {
4842       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4843     } else {
4844       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4845     }
4846     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4847     PetscFunctionReturn(0);
4848   }
4849   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4850   if (scall == MAT_INITIAL_MATRIX) {
4851     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4852   }
4853   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4854   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4855   PetscFunctionReturn(0);
4856 }
4857 
4858 /*@
4859      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4860           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4861           with MatGetSize()
4862 
4863     Not Collective
4864 
4865    Input Parameters:
4866 +    A - the matrix
4867 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4868 
4869    Output Parameter:
4870 .    A_loc - the local sequential matrix generated
4871 
4872     Level: developer
4873 
4874 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4875 
4876 @*/
4877 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4878 {
4879   PetscErrorCode ierr;
4880   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4881   Mat_SeqAIJ     *mat,*a,*b;
4882   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4883   MatScalar      *aa,*ba,*cam;
4884   PetscScalar    *ca;
4885   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4886   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4887   PetscBool      match;
4888   MPI_Comm       comm;
4889   PetscMPIInt    size;
4890 
4891   PetscFunctionBegin;
4892   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4893   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4894   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4895   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4896   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4897 
4898   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4899   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4900   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4901   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4902   aa = a->a; ba = b->a;
4903   if (scall == MAT_INITIAL_MATRIX) {
4904     if (size == 1) {
4905       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4906       PetscFunctionReturn(0);
4907     }
4908 
4909     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4910     ci[0] = 0;
4911     for (i=0; i<am; i++) {
4912       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4913     }
4914     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4915     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4916     k    = 0;
4917     for (i=0; i<am; i++) {
4918       ncols_o = bi[i+1] - bi[i];
4919       ncols_d = ai[i+1] - ai[i];
4920       /* off-diagonal portion of A */
4921       for (jo=0; jo<ncols_o; jo++) {
4922         col = cmap[*bj];
4923         if (col >= cstart) break;
4924         cj[k]   = col; bj++;
4925         ca[k++] = *ba++;
4926       }
4927       /* diagonal portion of A */
4928       for (j=0; j<ncols_d; j++) {
4929         cj[k]   = cstart + *aj++;
4930         ca[k++] = *aa++;
4931       }
4932       /* off-diagonal portion of A */
4933       for (j=jo; j<ncols_o; j++) {
4934         cj[k]   = cmap[*bj++];
4935         ca[k++] = *ba++;
4936       }
4937     }
4938     /* put together the new matrix */
4939     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4940     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4941     /* Since these are PETSc arrays, change flags to free them as necessary. */
4942     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4943     mat->free_a  = PETSC_TRUE;
4944     mat->free_ij = PETSC_TRUE;
4945     mat->nonew   = 0;
4946   } else if (scall == MAT_REUSE_MATRIX) {
4947     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4948     ci = mat->i; cj = mat->j; cam = mat->a;
4949     for (i=0; i<am; i++) {
4950       /* off-diagonal portion of A */
4951       ncols_o = bi[i+1] - bi[i];
4952       for (jo=0; jo<ncols_o; jo++) {
4953         col = cmap[*bj];
4954         if (col >= cstart) break;
4955         *cam++ = *ba++; bj++;
4956       }
4957       /* diagonal portion of A */
4958       ncols_d = ai[i+1] - ai[i];
4959       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4960       /* off-diagonal portion of A */
4961       for (j=jo; j<ncols_o; j++) {
4962         *cam++ = *ba++; bj++;
4963       }
4964     }
4965   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4966   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4967   PetscFunctionReturn(0);
4968 }
4969 
4970 /*@C
4971      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4972 
4973     Not Collective
4974 
4975    Input Parameters:
4976 +    A - the matrix
4977 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4978 -    row, col - index sets of rows and columns to extract (or NULL)
4979 
4980    Output Parameter:
4981 .    A_loc - the local sequential matrix generated
4982 
4983     Level: developer
4984 
4985 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4986 
4987 @*/
4988 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4989 {
4990   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4991   PetscErrorCode ierr;
4992   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4993   IS             isrowa,iscola;
4994   Mat            *aloc;
4995   PetscBool      match;
4996 
4997   PetscFunctionBegin;
4998   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4999   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5000   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5001   if (!row) {
5002     start = A->rmap->rstart; end = A->rmap->rend;
5003     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5004   } else {
5005     isrowa = *row;
5006   }
5007   if (!col) {
5008     start = A->cmap->rstart;
5009     cmap  = a->garray;
5010     nzA   = a->A->cmap->n;
5011     nzB   = a->B->cmap->n;
5012     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5013     ncols = 0;
5014     for (i=0; i<nzB; i++) {
5015       if (cmap[i] < start) idx[ncols++] = cmap[i];
5016       else break;
5017     }
5018     imark = i;
5019     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5020     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5021     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5022   } else {
5023     iscola = *col;
5024   }
5025   if (scall != MAT_INITIAL_MATRIX) {
5026     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5027     aloc[0] = *A_loc;
5028   }
5029   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5030   *A_loc = aloc[0];
5031   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5032   if (!row) {
5033     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5034   }
5035   if (!col) {
5036     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5037   }
5038   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5039   PetscFunctionReturn(0);
5040 }
5041 
5042 /*@C
5043     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5044 
5045     Collective on Mat
5046 
5047    Input Parameters:
5048 +    A,B - the matrices in mpiaij format
5049 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5050 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5051 
5052    Output Parameter:
5053 +    rowb, colb - index sets of rows and columns of B to extract
5054 -    B_seq - the sequential matrix generated
5055 
5056     Level: developer
5057 
5058 @*/
5059 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5060 {
5061   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5062   PetscErrorCode ierr;
5063   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5064   IS             isrowb,iscolb;
5065   Mat            *bseq=NULL;
5066 
5067   PetscFunctionBegin;
5068   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5069     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5070   }
5071   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5072 
5073   if (scall == MAT_INITIAL_MATRIX) {
5074     start = A->cmap->rstart;
5075     cmap  = a->garray;
5076     nzA   = a->A->cmap->n;
5077     nzB   = a->B->cmap->n;
5078     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5079     ncols = 0;
5080     for (i=0; i<nzB; i++) {  /* row < local row index */
5081       if (cmap[i] < start) idx[ncols++] = cmap[i];
5082       else break;
5083     }
5084     imark = i;
5085     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5086     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5087     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5088     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5089   } else {
5090     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5091     isrowb  = *rowb; iscolb = *colb;
5092     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5093     bseq[0] = *B_seq;
5094   }
5095   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5096   *B_seq = bseq[0];
5097   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5098   if (!rowb) {
5099     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5100   } else {
5101     *rowb = isrowb;
5102   }
5103   if (!colb) {
5104     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5105   } else {
5106     *colb = iscolb;
5107   }
5108   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5109   PetscFunctionReturn(0);
5110 }
5111 
5112 /*
5113     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5114     of the OFF-DIAGONAL portion of local A
5115 
5116     Collective on Mat
5117 
5118    Input Parameters:
5119 +    A,B - the matrices in mpiaij format
5120 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5121 
5122    Output Parameter:
5123 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5124 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5125 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5126 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5127 
5128     Level: developer
5129 
5130 */
5131 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5132 {
5133   VecScatter_MPI_General *gen_to,*gen_from;
5134   PetscErrorCode         ierr;
5135   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5136   Mat_SeqAIJ             *b_oth;
5137   VecScatter             ctx;
5138   MPI_Comm               comm;
5139   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5140   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5141   PetscInt               *rvalues,*svalues;
5142   MatScalar              *b_otha,*bufa,*bufA;
5143   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5144   MPI_Request            *rwaits = NULL,*swaits = NULL;
5145   MPI_Status             *sstatus,rstatus;
5146   PetscMPIInt            jj,size;
5147   PetscInt               *cols,sbs,rbs;
5148   PetscScalar            *vals;
5149 
5150   PetscFunctionBegin;
5151   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5152   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5153 
5154   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5155     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5156   }
5157   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5158   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5159 
5160   if (size == 1) {
5161     startsj_s = NULL;
5162     bufa_ptr  = NULL;
5163     *B_oth    = NULL;
5164     PetscFunctionReturn(0);
5165   }
5166 
5167   if (!a->Mvctx_mpi1) { /* create a->Mvctx_mpi1 to be used for Mat-Mat ops */
5168     a->Mvctx_mpi1_flg = PETSC_TRUE;
5169     ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5170   }
5171   ctx = a->Mvctx_mpi1;
5172   tag = ((PetscObject)ctx)->tag;
5173 
5174   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5175   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5176   nrecvs   = gen_from->n;
5177   nsends   = gen_to->n;
5178 
5179   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5180   srow    = gen_to->indices;    /* local row index to be sent */
5181   sstarts = gen_to->starts;
5182   sprocs  = gen_to->procs;
5183   sstatus = gen_to->sstatus;
5184   sbs     = gen_to->bs;
5185   rstarts = gen_from->starts;
5186   rprocs  = gen_from->procs;
5187   rbs     = gen_from->bs;
5188 
5189   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5190   if (scall == MAT_INITIAL_MATRIX) {
5191     /* i-array */
5192     /*---------*/
5193     /*  post receives */
5194     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5195     for (i=0; i<nrecvs; i++) {
5196       rowlen = rvalues + rstarts[i]*rbs;
5197       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5198       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5199     }
5200 
5201     /* pack the outgoing message */
5202     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5203 
5204     sstartsj[0] = 0;
5205     rstartsj[0] = 0;
5206     len         = 0; /* total length of j or a array to be sent */
5207     k           = 0;
5208     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5209     for (i=0; i<nsends; i++) {
5210       rowlen = svalues + sstarts[i]*sbs;
5211       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5212       for (j=0; j<nrows; j++) {
5213         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5214         for (l=0; l<sbs; l++) {
5215           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5216 
5217           rowlen[j*sbs+l] = ncols;
5218 
5219           len += ncols;
5220           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5221         }
5222         k++;
5223       }
5224       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5225 
5226       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5227     }
5228     /* recvs and sends of i-array are completed */
5229     i = nrecvs;
5230     while (i--) {
5231       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5232     }
5233     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5234     ierr = PetscFree(svalues);CHKERRQ(ierr);
5235 
5236     /* allocate buffers for sending j and a arrays */
5237     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5238     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5239 
5240     /* create i-array of B_oth */
5241     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5242 
5243     b_othi[0] = 0;
5244     len       = 0; /* total length of j or a array to be received */
5245     k         = 0;
5246     for (i=0; i<nrecvs; i++) {
5247       rowlen = rvalues + rstarts[i]*rbs;
5248       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5249       for (j=0; j<nrows; j++) {
5250         b_othi[k+1] = b_othi[k] + rowlen[j];
5251         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5252         k++;
5253       }
5254       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5255     }
5256     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5257 
5258     /* allocate space for j and a arrrays of B_oth */
5259     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5260     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5261 
5262     /* j-array */
5263     /*---------*/
5264     /*  post receives of j-array */
5265     for (i=0; i<nrecvs; i++) {
5266       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5267       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5268     }
5269 
5270     /* pack the outgoing message j-array */
5271     k = 0;
5272     for (i=0; i<nsends; i++) {
5273       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5274       bufJ  = bufj+sstartsj[i];
5275       for (j=0; j<nrows; j++) {
5276         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5277         for (ll=0; ll<sbs; ll++) {
5278           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5279           for (l=0; l<ncols; l++) {
5280             *bufJ++ = cols[l];
5281           }
5282           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5283         }
5284       }
5285       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5286     }
5287 
5288     /* recvs and sends of j-array are completed */
5289     i = nrecvs;
5290     while (i--) {
5291       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5292     }
5293     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5294   } else if (scall == MAT_REUSE_MATRIX) {
5295     sstartsj = *startsj_s;
5296     rstartsj = *startsj_r;
5297     bufa     = *bufa_ptr;
5298     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5299     b_otha   = b_oth->a;
5300   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5301 
5302   /* a-array */
5303   /*---------*/
5304   /*  post receives of a-array */
5305   for (i=0; i<nrecvs; i++) {
5306     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5307     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5308   }
5309 
5310   /* pack the outgoing message a-array */
5311   k = 0;
5312   for (i=0; i<nsends; i++) {
5313     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5314     bufA  = bufa+sstartsj[i];
5315     for (j=0; j<nrows; j++) {
5316       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5317       for (ll=0; ll<sbs; ll++) {
5318         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5319         for (l=0; l<ncols; l++) {
5320           *bufA++ = vals[l];
5321         }
5322         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5323       }
5324     }
5325     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5326   }
5327   /* recvs and sends of a-array are completed */
5328   i = nrecvs;
5329   while (i--) {
5330     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5331   }
5332   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5333   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5334 
5335   if (scall == MAT_INITIAL_MATRIX) {
5336     /* put together the new matrix */
5337     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5338 
5339     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5340     /* Since these are PETSc arrays, change flags to free them as necessary. */
5341     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5342     b_oth->free_a  = PETSC_TRUE;
5343     b_oth->free_ij = PETSC_TRUE;
5344     b_oth->nonew   = 0;
5345 
5346     ierr = PetscFree(bufj);CHKERRQ(ierr);
5347     if (!startsj_s || !bufa_ptr) {
5348       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5349       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5350     } else {
5351       *startsj_s = sstartsj;
5352       *startsj_r = rstartsj;
5353       *bufa_ptr  = bufa;
5354     }
5355   }
5356   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5357   PetscFunctionReturn(0);
5358 }
5359 
5360 /*@C
5361   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5362 
5363   Not Collective
5364 
5365   Input Parameters:
5366 . A - The matrix in mpiaij format
5367 
5368   Output Parameter:
5369 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5370 . colmap - A map from global column index to local index into lvec
5371 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5372 
5373   Level: developer
5374 
5375 @*/
5376 #if defined(PETSC_USE_CTABLE)
5377 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5378 #else
5379 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5380 #endif
5381 {
5382   Mat_MPIAIJ *a;
5383 
5384   PetscFunctionBegin;
5385   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5386   PetscValidPointer(lvec, 2);
5387   PetscValidPointer(colmap, 3);
5388   PetscValidPointer(multScatter, 4);
5389   a = (Mat_MPIAIJ*) A->data;
5390   if (lvec) *lvec = a->lvec;
5391   if (colmap) *colmap = a->colmap;
5392   if (multScatter) *multScatter = a->Mvctx;
5393   PetscFunctionReturn(0);
5394 }
5395 
5396 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5397 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5398 #if defined(PETSC_HAVE_MKL_SPARSE)
5399 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5400 #endif
5401 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5402 #if defined(PETSC_HAVE_ELEMENTAL)
5403 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5404 #endif
5405 #if defined(PETSC_HAVE_HYPRE)
5406 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5407 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5408 #endif
5409 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5410 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5411 
5412 /*
5413     Computes (B'*A')' since computing B*A directly is untenable
5414 
5415                n                       p                          p
5416         (              )       (              )         (                  )
5417       m (      A       )  *  n (       B      )   =   m (         C        )
5418         (              )       (              )         (                  )
5419 
5420 */
5421 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5422 {
5423   PetscErrorCode ierr;
5424   Mat            At,Bt,Ct;
5425 
5426   PetscFunctionBegin;
5427   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5428   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5429   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5430   ierr = MatDestroy(&At);CHKERRQ(ierr);
5431   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5432   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5433   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5434   PetscFunctionReturn(0);
5435 }
5436 
5437 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5438 {
5439   PetscErrorCode ierr;
5440   PetscInt       m=A->rmap->n,n=B->cmap->n;
5441   Mat            Cmat;
5442 
5443   PetscFunctionBegin;
5444   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5445   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5446   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5447   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5448   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5449   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5450   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5451   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5452 
5453   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5454 
5455   *C = Cmat;
5456   PetscFunctionReturn(0);
5457 }
5458 
5459 /* ----------------------------------------------------------------*/
5460 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5461 {
5462   PetscErrorCode ierr;
5463 
5464   PetscFunctionBegin;
5465   if (scall == MAT_INITIAL_MATRIX) {
5466     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5467     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5468     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5469   }
5470   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5471   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5472   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5473   PetscFunctionReturn(0);
5474 }
5475 
5476 /*MC
5477    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5478 
5479    Options Database Keys:
5480 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5481 
5482   Level: beginner
5483 
5484 .seealso: MatCreateAIJ()
5485 M*/
5486 
5487 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5488 {
5489   Mat_MPIAIJ     *b;
5490   PetscErrorCode ierr;
5491   PetscMPIInt    size;
5492 
5493   PetscFunctionBegin;
5494   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5495 
5496   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5497   B->data       = (void*)b;
5498   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5499   B->assembled  = PETSC_FALSE;
5500   B->insertmode = NOT_SET_VALUES;
5501   b->size       = size;
5502 
5503   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5504 
5505   /* build cache for off array entries formed */
5506   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5507 
5508   b->donotstash  = PETSC_FALSE;
5509   b->colmap      = 0;
5510   b->garray      = 0;
5511   b->roworiented = PETSC_TRUE;
5512 
5513   /* stuff used for matrix vector multiply */
5514   b->lvec  = NULL;
5515   b->Mvctx = NULL;
5516 
5517   /* stuff for MatGetRow() */
5518   b->rowindices   = 0;
5519   b->rowvalues    = 0;
5520   b->getrowactive = PETSC_FALSE;
5521 
5522   /* flexible pointer used in CUSP/CUSPARSE classes */
5523   b->spptr = NULL;
5524 
5525   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5527   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5528   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5529   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5531   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5534 #if defined(PETSC_HAVE_MKL_SPARSE)
5535   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5536 #endif
5537   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5538   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5539 #if defined(PETSC_HAVE_ELEMENTAL)
5540   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5541 #endif
5542 #if defined(PETSC_HAVE_HYPRE)
5543   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5544 #endif
5545   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5546   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5547   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5549   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5550 #if defined(PETSC_HAVE_HYPRE)
5551   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5552 #endif
5553   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5554   PetscFunctionReturn(0);
5555 }
5556 
5557 /*@C
5558      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5559          and "off-diagonal" part of the matrix in CSR format.
5560 
5561    Collective on MPI_Comm
5562 
5563    Input Parameters:
5564 +  comm - MPI communicator
5565 .  m - number of local rows (Cannot be PETSC_DECIDE)
5566 .  n - This value should be the same as the local size used in creating the
5567        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5568        calculated if N is given) For square matrices n is almost always m.
5569 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5570 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5571 .   i - row indices for "diagonal" portion of matrix
5572 .   j - column indices
5573 .   a - matrix values
5574 .   oi - row indices for "off-diagonal" portion of matrix
5575 .   oj - column indices
5576 -   oa - matrix values
5577 
5578    Output Parameter:
5579 .   mat - the matrix
5580 
5581    Level: advanced
5582 
5583    Notes:
5584        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5585        must free the arrays once the matrix has been destroyed and not before.
5586 
5587        The i and j indices are 0 based
5588 
5589        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5590 
5591        This sets local rows and cannot be used to set off-processor values.
5592 
5593        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5594        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5595        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5596        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5597        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5598        communication if it is known that only local entries will be set.
5599 
5600 .keywords: matrix, aij, compressed row, sparse, parallel
5601 
5602 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5603           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5604 @*/
5605 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5606 {
5607   PetscErrorCode ierr;
5608   Mat_MPIAIJ     *maij;
5609 
5610   PetscFunctionBegin;
5611   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5612   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5613   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5614   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5615   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5616   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5617   maij = (Mat_MPIAIJ*) (*mat)->data;
5618 
5619   (*mat)->preallocated = PETSC_TRUE;
5620 
5621   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5622   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5623 
5624   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5625   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5626 
5627   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5629   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5630   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5631 
5632   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5633   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5634   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5635   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5636   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5637   PetscFunctionReturn(0);
5638 }
5639 
5640 /*
5641     Special version for direct calls from Fortran
5642 */
5643 #include <petsc/private/fortranimpl.h>
5644 
5645 /* Change these macros so can be used in void function */
5646 #undef CHKERRQ
5647 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5648 #undef SETERRQ2
5649 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5650 #undef SETERRQ3
5651 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5652 #undef SETERRQ
5653 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5654 
5655 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5656 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5657 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5658 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5659 #else
5660 #endif
5661 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5662 {
5663   Mat            mat  = *mmat;
5664   PetscInt       m    = *mm, n = *mn;
5665   InsertMode     addv = *maddv;
5666   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5667   PetscScalar    value;
5668   PetscErrorCode ierr;
5669 
5670   MatCheckPreallocated(mat,1);
5671   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5672 
5673 #if defined(PETSC_USE_DEBUG)
5674   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5675 #endif
5676   {
5677     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5678     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5679     PetscBool roworiented = aij->roworiented;
5680 
5681     /* Some Variables required in the macro */
5682     Mat        A                 = aij->A;
5683     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5684     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5685     MatScalar  *aa               = a->a;
5686     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5687     Mat        B                 = aij->B;
5688     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5689     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5690     MatScalar  *ba               = b->a;
5691 
5692     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5693     PetscInt  nonew = a->nonew;
5694     MatScalar *ap1,*ap2;
5695 
5696     PetscFunctionBegin;
5697     for (i=0; i<m; i++) {
5698       if (im[i] < 0) continue;
5699 #if defined(PETSC_USE_DEBUG)
5700       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5701 #endif
5702       if (im[i] >= rstart && im[i] < rend) {
5703         row      = im[i] - rstart;
5704         lastcol1 = -1;
5705         rp1      = aj + ai[row];
5706         ap1      = aa + ai[row];
5707         rmax1    = aimax[row];
5708         nrow1    = ailen[row];
5709         low1     = 0;
5710         high1    = nrow1;
5711         lastcol2 = -1;
5712         rp2      = bj + bi[row];
5713         ap2      = ba + bi[row];
5714         rmax2    = bimax[row];
5715         nrow2    = bilen[row];
5716         low2     = 0;
5717         high2    = nrow2;
5718 
5719         for (j=0; j<n; j++) {
5720           if (roworiented) value = v[i*n+j];
5721           else value = v[i+j*m];
5722           if (in[j] >= cstart && in[j] < cend) {
5723             col = in[j] - cstart;
5724             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5725             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5726           } else if (in[j] < 0) continue;
5727 #if defined(PETSC_USE_DEBUG)
5728           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5729 #endif
5730           else {
5731             if (mat->was_assembled) {
5732               if (!aij->colmap) {
5733                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5734               }
5735 #if defined(PETSC_USE_CTABLE)
5736               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5737               col--;
5738 #else
5739               col = aij->colmap[in[j]] - 1;
5740 #endif
5741               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5742               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5743                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5744                 col  =  in[j];
5745                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5746                 B     = aij->B;
5747                 b     = (Mat_SeqAIJ*)B->data;
5748                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5749                 rp2   = bj + bi[row];
5750                 ap2   = ba + bi[row];
5751                 rmax2 = bimax[row];
5752                 nrow2 = bilen[row];
5753                 low2  = 0;
5754                 high2 = nrow2;
5755                 bm    = aij->B->rmap->n;
5756                 ba    = b->a;
5757               }
5758             } else col = in[j];
5759             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5760           }
5761         }
5762       } else if (!aij->donotstash) {
5763         if (roworiented) {
5764           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5765         } else {
5766           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5767         }
5768       }
5769     }
5770   }
5771   PetscFunctionReturnVoid();
5772 }
5773 
5774