xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 846b4da1c24d4646329ef843e1782566ebe4da91)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1042 {
1043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   /* do nondiagonal part */
1048   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1049   /* send it on its way */
1050   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   /* do local part */
1052   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1053   /* receive remote parts */
1054   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 /*
1059   This only works correctly for square matrices where the subblock A->A is the
1060    diagonal block
1061 */
1062 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1063 {
1064   PetscErrorCode ierr;
1065   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1066 
1067   PetscFunctionBegin;
1068   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1069   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1070   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1075 {
1076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1081   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1086 {
1087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091 #if defined(PETSC_USE_LOG)
1092   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1093 #endif
1094   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1095   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1096   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1097   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1098 #if defined(PETSC_USE_CTABLE)
1099   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1100 #else
1101   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1102 #endif
1103   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1105   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1106   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1107   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1108   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1109 
1110   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1111   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1112   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1114   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1116   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1118   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1119 #if defined(PETSC_HAVE_ELEMENTAL)
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1121 #endif
1122 #if defined(PETSC_HAVE_HYPRE)
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1125 #endif
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1130 {
1131   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1132   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1133   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1134   PetscErrorCode ierr;
1135   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1136   int            fd;
1137   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1138   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1139   PetscScalar    *column_values;
1140   PetscInt       message_count,flowcontrolcount;
1141   FILE           *file;
1142 
1143   PetscFunctionBegin;
1144   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1145   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1146   nz   = A->nz + B->nz;
1147   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1148   if (!rank) {
1149     header[0] = MAT_FILE_CLASSID;
1150     header[1] = mat->rmap->N;
1151     header[2] = mat->cmap->N;
1152 
1153     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1154     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1155     /* get largest number of rows any processor has */
1156     rlen  = mat->rmap->n;
1157     range = mat->rmap->range;
1158     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1159   } else {
1160     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1161     rlen = mat->rmap->n;
1162   }
1163 
1164   /* load up the local row counts */
1165   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1166   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1167 
1168   /* store the row lengths to the file */
1169   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1170   if (!rank) {
1171     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1172     for (i=1; i<size; i++) {
1173       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1174       rlen = range[i+1] - range[i];
1175       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1177     }
1178     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1179   } else {
1180     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1181     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1182     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1183   }
1184   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1185 
1186   /* load up the local column indices */
1187   nzmax = nz; /* th processor needs space a largest processor needs */
1188   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1189   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1190   cnt   = 0;
1191   for (i=0; i<mat->rmap->n; i++) {
1192     for (j=B->i[i]; j<B->i[i+1]; j++) {
1193       if ((col = garray[B->j[j]]) > cstart) break;
1194       column_indices[cnt++] = col;
1195     }
1196     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1197     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1198   }
1199   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1200 
1201   /* store the column indices to the file */
1202   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1203   if (!rank) {
1204     MPI_Status status;
1205     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1206     for (i=1; i<size; i++) {
1207       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1208       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1209       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1210       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1211       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1212     }
1213     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1214   } else {
1215     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1216     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1218     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1219   }
1220   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1221 
1222   /* load up the local column values */
1223   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1224   cnt  = 0;
1225   for (i=0; i<mat->rmap->n; i++) {
1226     for (j=B->i[i]; j<B->i[i+1]; j++) {
1227       if (garray[B->j[j]] > cstart) break;
1228       column_values[cnt++] = B->a[j];
1229     }
1230     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1231     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1232   }
1233   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1234 
1235   /* store the column values to the file */
1236   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1237   if (!rank) {
1238     MPI_Status status;
1239     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1240     for (i=1; i<size; i++) {
1241       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1242       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1243       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1244       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1246     }
1247     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1248   } else {
1249     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1250     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(column_values);CHKERRQ(ierr);
1255 
1256   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1257   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #include <petscdraw.h>
1262 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1265   PetscErrorCode    ierr;
1266   PetscMPIInt       rank = aij->rank,size = aij->size;
1267   PetscBool         isdraw,iascii,isbinary;
1268   PetscViewer       sviewer;
1269   PetscViewerFormat format;
1270 
1271   PetscFunctionBegin;
1272   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1273   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1274   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1275   if (iascii) {
1276     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1277     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1278       MatInfo   info;
1279       PetscBool inodes;
1280 
1281       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1282       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1283       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1284       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1285       if (!inodes) {
1286         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1287                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1288       } else {
1289         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1290                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1291       }
1292       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1293       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1294       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1295       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1296       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1297       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1298       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1299       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1300       PetscFunctionReturn(0);
1301     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1302       PetscInt inodecount,inodelimit,*inodes;
1303       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1304       if (inodes) {
1305         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1306       } else {
1307         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1308       }
1309       PetscFunctionReturn(0);
1310     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1311       PetscFunctionReturn(0);
1312     }
1313   } else if (isbinary) {
1314     if (size == 1) {
1315       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1316       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1317     } else {
1318       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1319     }
1320     PetscFunctionReturn(0);
1321   } else if (isdraw) {
1322     PetscDraw draw;
1323     PetscBool isnull;
1324     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1325     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1326     if (isnull) PetscFunctionReturn(0);
1327   }
1328 
1329   {
1330     /* assemble the entire matrix onto first processor. */
1331     Mat        A;
1332     Mat_SeqAIJ *Aloc;
1333     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1334     MatScalar  *a;
1335 
1336     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1337     if (!rank) {
1338       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1339     } else {
1340       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1341     }
1342     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1343     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1344     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1345     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1346     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1347 
1348     /* copy over the A part */
1349     Aloc = (Mat_SeqAIJ*)aij->A->data;
1350     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1351     row  = mat->rmap->rstart;
1352     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1353     for (i=0; i<m; i++) {
1354       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1355       row++;
1356       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1357     }
1358     aj = Aloc->j;
1359     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1360 
1361     /* copy over the B part */
1362     Aloc = (Mat_SeqAIJ*)aij->B->data;
1363     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1364     row  = mat->rmap->rstart;
1365     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1366     ct   = cols;
1367     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1368     for (i=0; i<m; i++) {
1369       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1370       row++;
1371       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1372     }
1373     ierr = PetscFree(ct);CHKERRQ(ierr);
1374     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376     /*
1377        Everyone has to call to draw the matrix since the graphics waits are
1378        synchronized across all processors that share the PetscDraw object
1379     */
1380     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1381     if (!rank) {
1382       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1384     }
1385     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1386     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1387     ierr = MatDestroy(&A);CHKERRQ(ierr);
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1393 {
1394   PetscErrorCode ierr;
1395   PetscBool      iascii,isdraw,issocket,isbinary;
1396 
1397   PetscFunctionBegin;
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1401   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1402   if (iascii || isdraw || isbinary || issocket) {
1403     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1409 {
1410   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1411   PetscErrorCode ierr;
1412   Vec            bb1 = 0;
1413   PetscBool      hasop;
1414 
1415   PetscFunctionBegin;
1416   if (flag == SOR_APPLY_UPPER) {
1417     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1418     PetscFunctionReturn(0);
1419   }
1420 
1421   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1422     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1423   }
1424 
1425   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1426     if (flag & SOR_ZERO_INITIAL_GUESS) {
1427       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1428       its--;
1429     }
1430 
1431     while (its--) {
1432       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1433       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1434 
1435       /* update rhs: bb1 = bb - B*x */
1436       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1437       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1438 
1439       /* local sweep */
1440       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1441     }
1442   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1443     if (flag & SOR_ZERO_INITIAL_GUESS) {
1444       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1445       its--;
1446     }
1447     while (its--) {
1448       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1449       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1453       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1454 
1455       /* local sweep */
1456       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1457     }
1458   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1461       its--;
1462     }
1463     while (its--) {
1464       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1465       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1469       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1470 
1471       /* local sweep */
1472       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1473     }
1474   } else if (flag & SOR_EISENSTAT) {
1475     Vec xx1;
1476 
1477     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1478     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1479 
1480     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1481     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482     if (!mat->diag) {
1483       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1484       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1485     }
1486     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1487     if (hasop) {
1488       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1489     } else {
1490       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1491     }
1492     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1493 
1494     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1495 
1496     /* local sweep */
1497     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1498     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1499     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1500   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1501 
1502   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1503 
1504   matin->factorerrortype = mat->A->factorerrortype;
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1509 {
1510   Mat            aA,aB,Aperm;
1511   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1512   PetscScalar    *aa,*ba;
1513   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1514   PetscSF        rowsf,sf;
1515   IS             parcolp = NULL;
1516   PetscBool      done;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1521   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1522   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1523   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1524 
1525   /* Invert row permutation to find out where my rows should go */
1526   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1527   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1528   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1529   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1530   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1531   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1532 
1533   /* Invert column permutation to find out where my columns should go */
1534   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1535   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1536   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1537   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1538   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1539   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1540   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1541 
1542   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1543   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1544   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1545 
1546   /* Find out where my gcols should go */
1547   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1548   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1549   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1550   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1551   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1552   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1553   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1554   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1555 
1556   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1557   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1558   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1559   for (i=0; i<m; i++) {
1560     PetscInt row = rdest[i],rowner;
1561     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1562     for (j=ai[i]; j<ai[i+1]; j++) {
1563       PetscInt cowner,col = cdest[aj[j]];
1564       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1565       if (rowner == cowner) dnnz[i]++;
1566       else onnz[i]++;
1567     }
1568     for (j=bi[i]; j<bi[i+1]; j++) {
1569       PetscInt cowner,col = gcdest[bj[j]];
1570       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1571       if (rowner == cowner) dnnz[i]++;
1572       else onnz[i]++;
1573     }
1574   }
1575   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1576   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1577   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1578   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1579   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1580 
1581   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1582   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1583   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1584   for (i=0; i<m; i++) {
1585     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1586     PetscInt j0,rowlen;
1587     rowlen = ai[i+1] - ai[i];
1588     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1589       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1590       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1591     }
1592     rowlen = bi[i+1] - bi[i];
1593     for (j0=j=0; j<rowlen; j0=j) {
1594       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1595       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1596     }
1597   }
1598   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1600   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1601   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1602   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1603   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1604   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1605   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1606   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1607   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1608   *B = Aperm;
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1613 {
1614   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1619   if (ghosts) *ghosts = aij->garray;
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1624 {
1625   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1626   Mat            A    = mat->A,B = mat->B;
1627   PetscErrorCode ierr;
1628   PetscReal      isend[5],irecv[5];
1629 
1630   PetscFunctionBegin;
1631   info->block_size = 1.0;
1632   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1633 
1634   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1635   isend[3] = info->memory;  isend[4] = info->mallocs;
1636 
1637   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1638 
1639   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1640   isend[3] += info->memory;  isend[4] += info->mallocs;
1641   if (flag == MAT_LOCAL) {
1642     info->nz_used      = isend[0];
1643     info->nz_allocated = isend[1];
1644     info->nz_unneeded  = isend[2];
1645     info->memory       = isend[3];
1646     info->mallocs      = isend[4];
1647   } else if (flag == MAT_GLOBAL_MAX) {
1648     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1649 
1650     info->nz_used      = irecv[0];
1651     info->nz_allocated = irecv[1];
1652     info->nz_unneeded  = irecv[2];
1653     info->memory       = irecv[3];
1654     info->mallocs      = irecv[4];
1655   } else if (flag == MAT_GLOBAL_SUM) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   }
1664   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1665   info->fill_ratio_needed = 0;
1666   info->factor_mallocs    = 0;
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1671 {
1672   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   switch (op) {
1677   case MAT_NEW_NONZERO_LOCATIONS:
1678   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1679   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1680   case MAT_KEEP_NONZERO_PATTERN:
1681   case MAT_NEW_NONZERO_LOCATION_ERR:
1682   case MAT_USE_INODES:
1683   case MAT_IGNORE_ZERO_ENTRIES:
1684     MatCheckPreallocated(A,1);
1685     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1686     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1687     break;
1688   case MAT_ROW_ORIENTED:
1689     MatCheckPreallocated(A,1);
1690     a->roworiented = flg;
1691 
1692     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1693     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1694     break;
1695   case MAT_NEW_DIAGONALS:
1696     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1697     break;
1698   case MAT_IGNORE_OFF_PROC_ENTRIES:
1699     a->donotstash = flg;
1700     break;
1701   case MAT_SPD:
1702     A->spd_set = PETSC_TRUE;
1703     A->spd     = flg;
1704     if (flg) {
1705       A->symmetric                  = PETSC_TRUE;
1706       A->structurally_symmetric     = PETSC_TRUE;
1707       A->symmetric_set              = PETSC_TRUE;
1708       A->structurally_symmetric_set = PETSC_TRUE;
1709     }
1710     break;
1711   case MAT_SYMMETRIC:
1712     MatCheckPreallocated(A,1);
1713     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1714     break;
1715   case MAT_STRUCTURALLY_SYMMETRIC:
1716     MatCheckPreallocated(A,1);
1717     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1718     break;
1719   case MAT_HERMITIAN:
1720     MatCheckPreallocated(A,1);
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     break;
1723   case MAT_SYMMETRY_ETERNAL:
1724     MatCheckPreallocated(A,1);
1725     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1726     break;
1727   case MAT_SUBMAT_SINGLEIS:
1728     A->submat_singleis = flg;
1729     break;
1730   case MAT_STRUCTURE_ONLY:
1731     /* The option is handled directly by MatSetOption() */
1732     break;
1733   default:
1734     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1735   }
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1740 {
1741   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1742   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1743   PetscErrorCode ierr;
1744   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1745   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1746   PetscInt       *cmap,*idx_p;
1747 
1748   PetscFunctionBegin;
1749   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1750   mat->getrowactive = PETSC_TRUE;
1751 
1752   if (!mat->rowvalues && (idx || v)) {
1753     /*
1754         allocate enough space to hold information from the longest row.
1755     */
1756     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1757     PetscInt   max = 1,tmp;
1758     for (i=0; i<matin->rmap->n; i++) {
1759       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1760       if (max < tmp) max = tmp;
1761     }
1762     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1763   }
1764 
1765   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1766   lrow = row - rstart;
1767 
1768   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1769   if (!v)   {pvA = 0; pvB = 0;}
1770   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1771   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1772   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1773   nztot = nzA + nzB;
1774 
1775   cmap = mat->garray;
1776   if (v  || idx) {
1777     if (nztot) {
1778       /* Sort by increasing column numbers, assuming A and B already sorted */
1779       PetscInt imark = -1;
1780       if (v) {
1781         *v = v_p = mat->rowvalues;
1782         for (i=0; i<nzB; i++) {
1783           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1784           else break;
1785         }
1786         imark = i;
1787         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1788         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1789       }
1790       if (idx) {
1791         *idx = idx_p = mat->rowindices;
1792         if (imark > -1) {
1793           for (i=0; i<imark; i++) {
1794             idx_p[i] = cmap[cworkB[i]];
1795           }
1796         } else {
1797           for (i=0; i<nzB; i++) {
1798             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1799             else break;
1800           }
1801           imark = i;
1802         }
1803         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1804         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1805       }
1806     } else {
1807       if (idx) *idx = 0;
1808       if (v)   *v   = 0;
1809     }
1810   }
1811   *nz  = nztot;
1812   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1813   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1818 {
1819   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1820 
1821   PetscFunctionBegin;
1822   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1823   aij->getrowactive = PETSC_FALSE;
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1828 {
1829   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1830   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1831   PetscErrorCode ierr;
1832   PetscInt       i,j,cstart = mat->cmap->rstart;
1833   PetscReal      sum = 0.0;
1834   MatScalar      *v;
1835 
1836   PetscFunctionBegin;
1837   if (aij->size == 1) {
1838     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1839   } else {
1840     if (type == NORM_FROBENIUS) {
1841       v = amat->a;
1842       for (i=0; i<amat->nz; i++) {
1843         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1844       }
1845       v = bmat->a;
1846       for (i=0; i<bmat->nz; i++) {
1847         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1848       }
1849       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1850       *norm = PetscSqrtReal(*norm);
1851       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1852     } else if (type == NORM_1) { /* max column norm */
1853       PetscReal *tmp,*tmp2;
1854       PetscInt  *jj,*garray = aij->garray;
1855       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1856       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1857       *norm = 0.0;
1858       v     = amat->a; jj = amat->j;
1859       for (j=0; j<amat->nz; j++) {
1860         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1861       }
1862       v = bmat->a; jj = bmat->j;
1863       for (j=0; j<bmat->nz; j++) {
1864         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1865       }
1866       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1867       for (j=0; j<mat->cmap->N; j++) {
1868         if (tmp2[j] > *norm) *norm = tmp2[j];
1869       }
1870       ierr = PetscFree(tmp);CHKERRQ(ierr);
1871       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1872       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1873     } else if (type == NORM_INFINITY) { /* max row norm */
1874       PetscReal ntemp = 0.0;
1875       for (j=0; j<aij->A->rmap->n; j++) {
1876         v   = amat->a + amat->i[j];
1877         sum = 0.0;
1878         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1879           sum += PetscAbsScalar(*v); v++;
1880         }
1881         v = bmat->a + bmat->i[j];
1882         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1883           sum += PetscAbsScalar(*v); v++;
1884         }
1885         if (sum > ntemp) ntemp = sum;
1886       }
1887       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1888       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1889     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1895 {
1896   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1897   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1898   PetscErrorCode ierr;
1899   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1900   PetscInt       cstart = A->cmap->rstart,ncol;
1901   Mat            B;
1902   MatScalar      *array;
1903 
1904   PetscFunctionBegin;
1905   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1906 
1907   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1908   ai = Aloc->i; aj = Aloc->j;
1909   bi = Bloc->i; bj = Bloc->j;
1910   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1911     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1912     PetscSFNode          *oloc;
1913     PETSC_UNUSED PetscSF sf;
1914 
1915     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1916     /* compute d_nnz for preallocation */
1917     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1918     for (i=0; i<ai[ma]; i++) {
1919       d_nnz[aj[i]]++;
1920       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1921     }
1922     /* compute local off-diagonal contributions */
1923     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1924     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1925     /* map those to global */
1926     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1927     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1928     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1929     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1930     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1931     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1932     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1933 
1934     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1935     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1936     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1937     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1938     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1939     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1940   } else {
1941     B    = *matout;
1942     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1943     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1944   }
1945 
1946   /* copy over the A part */
1947   array = Aloc->a;
1948   row   = A->rmap->rstart;
1949   for (i=0; i<ma; i++) {
1950     ncol = ai[i+1]-ai[i];
1951     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1952     row++;
1953     array += ncol; aj += ncol;
1954   }
1955   aj = Aloc->j;
1956   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1957 
1958   /* copy over the B part */
1959   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1960   array = Bloc->a;
1961   row   = A->rmap->rstart;
1962   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1963   cols_tmp = cols;
1964   for (i=0; i<mb; i++) {
1965     ncol = bi[i+1]-bi[i];
1966     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1967     row++;
1968     array += ncol; cols_tmp += ncol;
1969   }
1970   ierr = PetscFree(cols);CHKERRQ(ierr);
1971 
1972   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1974   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1975     *matout = B;
1976   } else {
1977     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1978   }
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1983 {
1984   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1985   Mat            a    = aij->A,b = aij->B;
1986   PetscErrorCode ierr;
1987   PetscInt       s1,s2,s3;
1988 
1989   PetscFunctionBegin;
1990   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1991   if (rr) {
1992     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1993     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1994     /* Overlap communication with computation. */
1995     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1996   }
1997   if (ll) {
1998     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1999     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2000     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2001   }
2002   /* scale  the diagonal block */
2003   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2004 
2005   if (rr) {
2006     /* Do a scatter end and then right scale the off-diagonal block */
2007     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2008     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2014 {
2015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2024 {
2025   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2026   Mat            a,b,c,d;
2027   PetscBool      flg;
2028   PetscErrorCode ierr;
2029 
2030   PetscFunctionBegin;
2031   a = matA->A; b = matA->B;
2032   c = matB->A; d = matB->B;
2033 
2034   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2035   if (flg) {
2036     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2037   }
2038   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2043 {
2044   PetscErrorCode ierr;
2045   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2046   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2047 
2048   PetscFunctionBegin;
2049   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2050   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2051     /* because of the column compression in the off-processor part of the matrix a->B,
2052        the number of columns in a->B and b->B may be different, hence we cannot call
2053        the MatCopy() directly on the two parts. If need be, we can provide a more
2054        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2055        then copying the submatrices */
2056     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2057   } else {
2058     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2059     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2060   }
2061   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2066 {
2067   PetscErrorCode ierr;
2068 
2069   PetscFunctionBegin;
2070   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 /*
2075    Computes the number of nonzeros per row needed for preallocation when X and Y
2076    have different nonzero structure.
2077 */
2078 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2079 {
2080   PetscInt       i,j,k,nzx,nzy;
2081 
2082   PetscFunctionBegin;
2083   /* Set the number of nonzeros in the new matrix */
2084   for (i=0; i<m; i++) {
2085     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2086     nzx = xi[i+1] - xi[i];
2087     nzy = yi[i+1] - yi[i];
2088     nnz[i] = 0;
2089     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2090       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2091       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2092       nnz[i]++;
2093     }
2094     for (; k<nzy; k++) nnz[i]++;
2095   }
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2100 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2101 {
2102   PetscErrorCode ierr;
2103   PetscInt       m = Y->rmap->N;
2104   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2105   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2106 
2107   PetscFunctionBegin;
2108   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2113 {
2114   PetscErrorCode ierr;
2115   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2116   PetscBLASInt   bnz,one=1;
2117   Mat_SeqAIJ     *x,*y;
2118 
2119   PetscFunctionBegin;
2120   if (str == SAME_NONZERO_PATTERN) {
2121     PetscScalar alpha = a;
2122     x    = (Mat_SeqAIJ*)xx->A->data;
2123     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2124     y    = (Mat_SeqAIJ*)yy->A->data;
2125     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2126     x    = (Mat_SeqAIJ*)xx->B->data;
2127     y    = (Mat_SeqAIJ*)yy->B->data;
2128     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2129     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2130     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2131   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2132     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2133   } else {
2134     Mat      B;
2135     PetscInt *nnz_d,*nnz_o;
2136     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2137     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2138     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2139     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2140     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2141     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2142     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2143     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2144     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2145     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2146     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2147     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2148     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2149     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2150   }
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2155 
2156 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2157 {
2158 #if defined(PETSC_USE_COMPLEX)
2159   PetscErrorCode ierr;
2160   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2161 
2162   PetscFunctionBegin;
2163   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2164   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2165 #else
2166   PetscFunctionBegin;
2167 #endif
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2172 {
2173   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2174   PetscErrorCode ierr;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2178   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2183 {
2184   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2185   PetscErrorCode ierr;
2186 
2187   PetscFunctionBegin;
2188   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2189   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2194 {
2195   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2196   PetscErrorCode ierr;
2197   PetscInt       i,*idxb = 0;
2198   PetscScalar    *va,*vb;
2199   Vec            vtmp;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2203   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2204   if (idx) {
2205     for (i=0; i<A->rmap->n; i++) {
2206       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2207     }
2208   }
2209 
2210   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2211   if (idx) {
2212     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2213   }
2214   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2215   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2216 
2217   for (i=0; i<A->rmap->n; i++) {
2218     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2219       va[i] = vb[i];
2220       if (idx) idx[i] = a->garray[idxb[i]];
2221     }
2222   }
2223 
2224   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2225   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2226   ierr = PetscFree(idxb);CHKERRQ(ierr);
2227   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2232 {
2233   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2234   PetscErrorCode ierr;
2235   PetscInt       i,*idxb = 0;
2236   PetscScalar    *va,*vb;
2237   Vec            vtmp;
2238 
2239   PetscFunctionBegin;
2240   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2241   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2242   if (idx) {
2243     for (i=0; i<A->cmap->n; i++) {
2244       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2245     }
2246   }
2247 
2248   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2249   if (idx) {
2250     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2251   }
2252   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2253   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2254 
2255   for (i=0; i<A->rmap->n; i++) {
2256     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2257       va[i] = vb[i];
2258       if (idx) idx[i] = a->garray[idxb[i]];
2259     }
2260   }
2261 
2262   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2263   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2264   ierr = PetscFree(idxb);CHKERRQ(ierr);
2265   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2270 {
2271   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2272   PetscInt       n      = A->rmap->n;
2273   PetscInt       cstart = A->cmap->rstart;
2274   PetscInt       *cmap  = mat->garray;
2275   PetscInt       *diagIdx, *offdiagIdx;
2276   Vec            diagV, offdiagV;
2277   PetscScalar    *a, *diagA, *offdiagA;
2278   PetscInt       r;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2283   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2284   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2285   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2286   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2287   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2288   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2289   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2290   for (r = 0; r < n; ++r) {
2291     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2292       a[r]   = diagA[r];
2293       idx[r] = cstart + diagIdx[r];
2294     } else {
2295       a[r]   = offdiagA[r];
2296       idx[r] = cmap[offdiagIdx[r]];
2297     }
2298   }
2299   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2300   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2301   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2302   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2303   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2304   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2309 {
2310   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2311   PetscInt       n      = A->rmap->n;
2312   PetscInt       cstart = A->cmap->rstart;
2313   PetscInt       *cmap  = mat->garray;
2314   PetscInt       *diagIdx, *offdiagIdx;
2315   Vec            diagV, offdiagV;
2316   PetscScalar    *a, *diagA, *offdiagA;
2317   PetscInt       r;
2318   PetscErrorCode ierr;
2319 
2320   PetscFunctionBegin;
2321   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2322   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2323   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2324   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2325   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2326   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2327   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2328   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2329   for (r = 0; r < n; ++r) {
2330     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2331       a[r]   = diagA[r];
2332       idx[r] = cstart + diagIdx[r];
2333     } else {
2334       a[r]   = offdiagA[r];
2335       idx[r] = cmap[offdiagIdx[r]];
2336     }
2337   }
2338   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2339   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2340   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2341   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2342   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2343   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2348 {
2349   PetscErrorCode ierr;
2350   Mat            *dummy;
2351 
2352   PetscFunctionBegin;
2353   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2354   *newmat = *dummy;
2355   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2360 {
2361   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2362   PetscErrorCode ierr;
2363 
2364   PetscFunctionBegin;
2365   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2366   A->factorerrortype = a->A->factorerrortype;
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2371 {
2372   PetscErrorCode ierr;
2373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2374 
2375   PetscFunctionBegin;
2376   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2377   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2378   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2379   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2384 {
2385   PetscFunctionBegin;
2386   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2387   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 /*@
2392    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2393 
2394    Collective on Mat
2395 
2396    Input Parameters:
2397 +    A - the matrix
2398 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2399 
2400  Level: advanced
2401 
2402 @*/
2403 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2404 {
2405   PetscErrorCode       ierr;
2406 
2407   PetscFunctionBegin;
2408   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2413 {
2414   PetscErrorCode       ierr;
2415   PetscBool            sc = PETSC_FALSE,flg;
2416 
2417   PetscFunctionBegin;
2418   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2419   ierr = PetscObjectOptionsBegin((PetscObject)A);
2420     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2421     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2422     if (flg) {
2423       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2424     }
2425   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2430 {
2431   PetscErrorCode ierr;
2432   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2433   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2434 
2435   PetscFunctionBegin;
2436   if (!Y->preallocated) {
2437     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2438   } else if (!aij->nz) {
2439     PetscInt nonew = aij->nonew;
2440     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2441     aij->nonew = nonew;
2442   }
2443   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2448 {
2449   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2450   PetscErrorCode ierr;
2451 
2452   PetscFunctionBegin;
2453   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2454   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2455   if (d) {
2456     PetscInt rstart;
2457     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2458     *d += rstart;
2459 
2460   }
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 
2465 /* -------------------------------------------------------------------*/
2466 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2467                                        MatGetRow_MPIAIJ,
2468                                        MatRestoreRow_MPIAIJ,
2469                                        MatMult_MPIAIJ,
2470                                 /* 4*/ MatMultAdd_MPIAIJ,
2471                                        MatMultTranspose_MPIAIJ,
2472                                        MatMultTransposeAdd_MPIAIJ,
2473                                        0,
2474                                        0,
2475                                        0,
2476                                 /*10*/ 0,
2477                                        0,
2478                                        0,
2479                                        MatSOR_MPIAIJ,
2480                                        MatTranspose_MPIAIJ,
2481                                 /*15*/ MatGetInfo_MPIAIJ,
2482                                        MatEqual_MPIAIJ,
2483                                        MatGetDiagonal_MPIAIJ,
2484                                        MatDiagonalScale_MPIAIJ,
2485                                        MatNorm_MPIAIJ,
2486                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2487                                        MatAssemblyEnd_MPIAIJ,
2488                                        MatSetOption_MPIAIJ,
2489                                        MatZeroEntries_MPIAIJ,
2490                                 /*24*/ MatZeroRows_MPIAIJ,
2491                                        0,
2492                                        0,
2493                                        0,
2494                                        0,
2495                                 /*29*/ MatSetUp_MPIAIJ,
2496                                        0,
2497                                        0,
2498                                        MatGetDiagonalBlock_MPIAIJ,
2499                                        0,
2500                                 /*34*/ MatDuplicate_MPIAIJ,
2501                                        0,
2502                                        0,
2503                                        0,
2504                                        0,
2505                                 /*39*/ MatAXPY_MPIAIJ,
2506                                        MatCreateSubMatrices_MPIAIJ,
2507                                        MatIncreaseOverlap_MPIAIJ,
2508                                        MatGetValues_MPIAIJ,
2509                                        MatCopy_MPIAIJ,
2510                                 /*44*/ MatGetRowMax_MPIAIJ,
2511                                        MatScale_MPIAIJ,
2512                                        MatShift_MPIAIJ,
2513                                        MatDiagonalSet_MPIAIJ,
2514                                        MatZeroRowsColumns_MPIAIJ,
2515                                 /*49*/ MatSetRandom_MPIAIJ,
2516                                        0,
2517                                        0,
2518                                        0,
2519                                        0,
2520                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2521                                        0,
2522                                        MatSetUnfactored_MPIAIJ,
2523                                        MatPermute_MPIAIJ,
2524                                        0,
2525                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2526                                        MatDestroy_MPIAIJ,
2527                                        MatView_MPIAIJ,
2528                                        0,
2529                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2530                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2531                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2536                                        MatGetRowMinAbs_MPIAIJ,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                        0,
2541                                 /*75*/ MatFDColoringApply_AIJ,
2542                                        MatSetFromOptions_MPIAIJ,
2543                                        0,
2544                                        0,
2545                                        MatFindZeroDiagonals_MPIAIJ,
2546                                 /*80*/ 0,
2547                                        0,
2548                                        0,
2549                                 /*83*/ MatLoad_MPIAIJ,
2550                                        0,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                        0,
2555                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2556                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2557                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2558                                        MatPtAP_MPIAIJ_MPIAIJ,
2559                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2560                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                        0,
2565                                 /*99*/ 0,
2566                                        0,
2567                                        0,
2568                                        MatConjugate_MPIAIJ,
2569                                        0,
2570                                 /*104*/MatSetValuesRow_MPIAIJ,
2571                                        MatRealPart_MPIAIJ,
2572                                        MatImaginaryPart_MPIAIJ,
2573                                        0,
2574                                        0,
2575                                 /*109*/0,
2576                                        0,
2577                                        MatGetRowMin_MPIAIJ,
2578                                        0,
2579                                        MatMissingDiagonal_MPIAIJ,
2580                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2581                                        0,
2582                                        MatGetGhosts_MPIAIJ,
2583                                        0,
2584                                        0,
2585                                 /*119*/0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                        MatGetMultiProcBlock_MPIAIJ,
2590                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2591                                        MatGetColumnNorms_MPIAIJ,
2592                                        MatInvertBlockDiagonal_MPIAIJ,
2593                                        0,
2594                                        MatCreateSubMatricesMPI_MPIAIJ,
2595                                 /*129*/0,
2596                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2597                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2598                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2599                                        0,
2600                                 /*134*/0,
2601                                        0,
2602                                        MatRARt_MPIAIJ_MPIAIJ,
2603                                        0,
2604                                        0,
2605                                 /*139*/MatSetBlockSizes_MPIAIJ,
2606                                        0,
2607                                        0,
2608                                        MatFDColoringSetUp_MPIXAIJ,
2609                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2610                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2611 };
2612 
2613 /* ----------------------------------------------------------------------------------------*/
2614 
2615 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2616 {
2617   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2618   PetscErrorCode ierr;
2619 
2620   PetscFunctionBegin;
2621   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2622   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2623   PetscFunctionReturn(0);
2624 }
2625 
2626 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2627 {
2628   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2629   PetscErrorCode ierr;
2630 
2631   PetscFunctionBegin;
2632   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2633   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2638 {
2639   Mat_MPIAIJ     *b;
2640   PetscErrorCode ierr;
2641 
2642   PetscFunctionBegin;
2643   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2644   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2645   b = (Mat_MPIAIJ*)B->data;
2646 
2647 #if defined(PETSC_USE_CTABLE)
2648   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2649 #else
2650   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2651 #endif
2652   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2653   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2654   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2655 
2656   /* Because the B will have been resized we simply destroy it and create a new one each time */
2657   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2658   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2659   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2660   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2661   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2662   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2663 
2664   if (!B->preallocated) {
2665     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2666     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2667     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2668     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2669     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2670   }
2671 
2672   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2673   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2674   B->preallocated  = PETSC_TRUE;
2675   B->was_assembled = PETSC_FALSE;
2676   B->assembled     = PETSC_FALSE;;
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2681 {
2682   Mat_MPIAIJ     *b;
2683   PetscErrorCode ierr;
2684 
2685   PetscFunctionBegin;
2686   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2687   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2688   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2689   b = (Mat_MPIAIJ*)B->data;
2690 
2691 #if defined(PETSC_USE_CTABLE)
2692   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2693 #else
2694   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2695 #endif
2696   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2697   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2698   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2699 
2700   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2701   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2702   B->preallocated  = PETSC_TRUE;
2703   B->was_assembled = PETSC_FALSE;
2704   B->assembled = PETSC_FALSE;
2705   PetscFunctionReturn(0);
2706 }
2707 
2708 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2709 {
2710   Mat            mat;
2711   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2712   PetscErrorCode ierr;
2713 
2714   PetscFunctionBegin;
2715   *newmat = 0;
2716   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2717   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2718   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2719   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2720   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2721   a       = (Mat_MPIAIJ*)mat->data;
2722 
2723   mat->factortype   = matin->factortype;
2724   mat->assembled    = PETSC_TRUE;
2725   mat->insertmode   = NOT_SET_VALUES;
2726   mat->preallocated = PETSC_TRUE;
2727 
2728   a->size         = oldmat->size;
2729   a->rank         = oldmat->rank;
2730   a->donotstash   = oldmat->donotstash;
2731   a->roworiented  = oldmat->roworiented;
2732   a->rowindices   = 0;
2733   a->rowvalues    = 0;
2734   a->getrowactive = PETSC_FALSE;
2735 
2736   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2737   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2738 
2739   if (oldmat->colmap) {
2740 #if defined(PETSC_USE_CTABLE)
2741     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2742 #else
2743     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2744     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2745     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2746 #endif
2747   } else a->colmap = 0;
2748   if (oldmat->garray) {
2749     PetscInt len;
2750     len  = oldmat->B->cmap->n;
2751     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2752     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2753     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2754   } else a->garray = 0;
2755 
2756   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2757   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2758   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2759   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2760   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2761   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2762   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2763   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2764   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2765   *newmat = mat;
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2770 {
2771   PetscScalar    *vals,*svals;
2772   MPI_Comm       comm;
2773   PetscErrorCode ierr;
2774   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2775   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2776   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2777   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2778   PetscInt       cend,cstart,n,*rowners;
2779   int            fd;
2780   PetscInt       bs = newMat->rmap->bs;
2781 
2782   PetscFunctionBegin;
2783   /* force binary viewer to load .info file if it has not yet done so */
2784   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2785   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2786   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2787   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2788   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2789   if (!rank) {
2790     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2791     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2792     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2793   }
2794 
2795   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2796   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2797   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2798   if (bs < 0) bs = 1;
2799 
2800   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2801   M    = header[1]; N = header[2];
2802 
2803   /* If global sizes are set, check if they are consistent with that given in the file */
2804   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2805   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2806 
2807   /* determine ownership of all (block) rows */
2808   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2809   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2810   else m = newMat->rmap->n; /* Set by user */
2811 
2812   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2813   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2814 
2815   /* First process needs enough room for process with most rows */
2816   if (!rank) {
2817     mmax = rowners[1];
2818     for (i=2; i<=size; i++) {
2819       mmax = PetscMax(mmax, rowners[i]);
2820     }
2821   } else mmax = -1;             /* unused, but compilers complain */
2822 
2823   rowners[0] = 0;
2824   for (i=2; i<=size; i++) {
2825     rowners[i] += rowners[i-1];
2826   }
2827   rstart = rowners[rank];
2828   rend   = rowners[rank+1];
2829 
2830   /* distribute row lengths to all processors */
2831   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2832   if (!rank) {
2833     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2834     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2835     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2836     for (j=0; j<m; j++) {
2837       procsnz[0] += ourlens[j];
2838     }
2839     for (i=1; i<size; i++) {
2840       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2841       /* calculate the number of nonzeros on each processor */
2842       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2843         procsnz[i] += rowlengths[j];
2844       }
2845       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2846     }
2847     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2848   } else {
2849     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2850   }
2851 
2852   if (!rank) {
2853     /* determine max buffer needed and allocate it */
2854     maxnz = 0;
2855     for (i=0; i<size; i++) {
2856       maxnz = PetscMax(maxnz,procsnz[i]);
2857     }
2858     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2859 
2860     /* read in my part of the matrix column indices  */
2861     nz   = procsnz[0];
2862     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2863     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2864 
2865     /* read in every one elses and ship off */
2866     for (i=1; i<size; i++) {
2867       nz   = procsnz[i];
2868       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2869       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2870     }
2871     ierr = PetscFree(cols);CHKERRQ(ierr);
2872   } else {
2873     /* determine buffer space needed for message */
2874     nz = 0;
2875     for (i=0; i<m; i++) {
2876       nz += ourlens[i];
2877     }
2878     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2879 
2880     /* receive message of column indices*/
2881     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2882   }
2883 
2884   /* determine column ownership if matrix is not square */
2885   if (N != M) {
2886     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2887     else n = newMat->cmap->n;
2888     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2889     cstart = cend - n;
2890   } else {
2891     cstart = rstart;
2892     cend   = rend;
2893     n      = cend - cstart;
2894   }
2895 
2896   /* loop over local rows, determining number of off diagonal entries */
2897   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2898   jj   = 0;
2899   for (i=0; i<m; i++) {
2900     for (j=0; j<ourlens[i]; j++) {
2901       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2902       jj++;
2903     }
2904   }
2905 
2906   for (i=0; i<m; i++) {
2907     ourlens[i] -= offlens[i];
2908   }
2909   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2910 
2911   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2912 
2913   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2914 
2915   for (i=0; i<m; i++) {
2916     ourlens[i] += offlens[i];
2917   }
2918 
2919   if (!rank) {
2920     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2921 
2922     /* read in my part of the matrix numerical values  */
2923     nz   = procsnz[0];
2924     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2925 
2926     /* insert into matrix */
2927     jj      = rstart;
2928     smycols = mycols;
2929     svals   = vals;
2930     for (i=0; i<m; i++) {
2931       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2932       smycols += ourlens[i];
2933       svals   += ourlens[i];
2934       jj++;
2935     }
2936 
2937     /* read in other processors and ship out */
2938     for (i=1; i<size; i++) {
2939       nz   = procsnz[i];
2940       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2941       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2942     }
2943     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2944   } else {
2945     /* receive numeric values */
2946     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2947 
2948     /* receive message of values*/
2949     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2950 
2951     /* insert into matrix */
2952     jj      = rstart;
2953     smycols = mycols;
2954     svals   = vals;
2955     for (i=0; i<m; i++) {
2956       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2957       smycols += ourlens[i];
2958       svals   += ourlens[i];
2959       jj++;
2960     }
2961   }
2962   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2963   ierr = PetscFree(vals);CHKERRQ(ierr);
2964   ierr = PetscFree(mycols);CHKERRQ(ierr);
2965   ierr = PetscFree(rowners);CHKERRQ(ierr);
2966   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2967   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 /* Not scalable because of ISAllGather() unless getting all columns. */
2972 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2973 {
2974   PetscErrorCode ierr;
2975   IS             iscol_local;
2976   PetscBool      isstride;
2977   PetscMPIInt    lisstride=0,gisstride;
2978 
2979   PetscFunctionBegin;
2980   /* check if we are grabbing all columns*/
2981   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2982 
2983   if (isstride) {
2984     PetscInt  start,len,mstart,mlen;
2985     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2986     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2987     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2988     if (mstart == start && mlen-mstart == len) lisstride = 1;
2989   }
2990 
2991   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2992   if (gisstride) {
2993     PetscInt N;
2994     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2995     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2996     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2997     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2998   } else {
2999     PetscInt cbs;
3000     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3001     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3002     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3003   }
3004 
3005   *isseq = iscol_local;
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 /*
3010  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3011  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3012 
3013  Input Parameters:
3014    mat - matrix
3015    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3016            i.e., mat->rstart <= isrow[i] < mat->rend
3017    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3018            i.e., mat->cstart <= iscol[i] < mat->cend
3019  Output Parameter:
3020    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3021    iscol_o - sequential column index set for retrieving mat->B
3022    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3023  */
3024 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3025 {
3026   PetscErrorCode ierr;
3027   Vec            x,cmap;
3028   const PetscInt *is_idx;
3029   PetscScalar    *xarray,*cmaparray;
3030   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3031   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3032   Mat            B=a->B;
3033   Vec            lvec=a->lvec,lcmap;
3034   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3035   MPI_Comm       comm;
3036 
3037   PetscFunctionBegin;
3038   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3039   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3040 
3041   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3042   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3043   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3044   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3045 
3046   /* Get start indices */
3047   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3048   isstart -= ncols;
3049   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3050 
3051   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3052   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3053   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3054   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3055   for (i=0; i<ncols; i++) {
3056     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3057     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3058     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3059   }
3060   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3061   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3062   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3063 
3064   /* Get iscol_d */
3065   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3066   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3067   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3068 
3069   /* Get isrow_d */
3070   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3071   rstart = mat->rmap->rstart;
3072   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3073   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3074   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3075   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3076 
3077   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3078   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3079   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3080 
3081   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3082   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3083 
3084   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3085 
3086   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3087   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3088   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3089 
3090   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3091   /* off-process column indices */
3092   count = 0;
3093   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3094   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3095 
3096   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3097   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3098   for (i=0; i<Bn; i++) {
3099     if (PetscRealPart(xarray[i]) > -1.0) {
3100       idx[count]     = i;                   /* local column index in off-diagonal part B */
3101       cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3102     }
3103   }
3104   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3105   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3106 
3107   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3108   /* cannot ensure iscol_o has same blocksize as iscol! */
3109 
3110   ierr = PetscFree(idx);CHKERRQ(ierr);
3111 
3112   *garray = cmap1;
3113 
3114   ierr = VecDestroy(&x);CHKERRQ(ierr);
3115   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3116   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3117   PetscFunctionReturn(0);
3118 }
3119 
3120 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3121 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3122 {
3123   PetscErrorCode ierr;
3124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3125   Mat            M = NULL;
3126   MPI_Comm       comm;
3127   IS             iscol_d,isrow_d,iscol_o;
3128   Mat            Asub = NULL,Bsub = NULL;
3129   PetscInt       n;
3130 
3131   PetscFunctionBegin;
3132   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3133 
3134   if (call == MAT_REUSE_MATRIX) {
3135     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3136     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3137     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3138 
3139     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3140     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3141 
3142     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3143     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3144 
3145     /* Update diagonal and off-diagonal portions of submat */
3146     asub = (Mat_MPIAIJ*)(*submat)->data;
3147     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3148     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3149     if (n) {
3150       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3151     }
3152     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3154 
3155   } else { /* call == MAT_INITIAL_MATRIX) */
3156     const PetscInt *garray;
3157     PetscInt        BsubN;
3158 
3159     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3160     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3161 
3162     /* Create local submatrices Asub and Bsub */
3163     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3164     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3165 
3166     /* Create submatrix M */
3167     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3168 
3169     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3170     asub = (Mat_MPIAIJ*)M->data;
3171 
3172     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3173     n = asub->B->cmap->N;
3174     if (BsubN > n) {
3175       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3176       const PetscInt *idx;
3177       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3178       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3179 
3180       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3181       j = 0;
3182       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3183       for (i=0; i<n; i++) {
3184         if (j >= BsubN) break;
3185         while (subgarray[i] > garray[j]) j++;
3186 
3187         if (subgarray[i] == garray[j]) {
3188           idx_new[i] = idx[j++];
3189         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3190       }
3191       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3192 
3193       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3194       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3195 
3196     } else if (BsubN < n) {
3197       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3198     }
3199 
3200     ierr = PetscFree(garray);CHKERRQ(ierr);
3201     *submat = M;
3202 
3203     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3204     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3205     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3206 
3207     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3208     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3209 
3210     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3211     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3212   }
3213   PetscFunctionReturn(0);
3214 }
3215 
3216 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3217 {
3218   PetscErrorCode ierr;
3219   IS             iscol_local,isrow_d;
3220   PetscInt       csize;
3221   PetscInt       n,i,j,start,end;
3222   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3223   MPI_Comm       comm;
3224 
3225   PetscFunctionBegin;
3226   /* If isrow has same processor distribution as mat,
3227      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3228   if (call == MAT_REUSE_MATRIX) {
3229     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3230     if (isrow_d) {
3231       sameRowDist  = PETSC_TRUE;
3232       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3233     } else {
3234       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3235       if (iscol_local) {
3236         sameRowDist  = PETSC_TRUE;
3237         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3238       }
3239     }
3240   } else {
3241     /* Check if isrow has same processor distribution as mat */
3242     sameDist[0] = PETSC_FALSE;
3243     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3244     if (!n) {
3245       sameDist[0] = PETSC_TRUE;
3246     } else {
3247       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3248       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3249       if (i >= start && j < end) {
3250         sameDist[0] = PETSC_TRUE;
3251       }
3252     }
3253 
3254     /* Check if iscol has same processor distribution as mat */
3255     sameDist[1] = PETSC_FALSE;
3256     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3257     if (!n) {
3258       sameDist[1] = PETSC_TRUE;
3259     } else {
3260       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3261       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3262       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3263     }
3264 
3265     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3266     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3267     sameRowDist = tsameDist[0];
3268   }
3269 
3270   if (sameRowDist) {
3271     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3272       /* isrow and iscol have same processor distribution as mat */
3273       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3274     } else { /* sameRowDist */
3275       /* isrow has same processor distribution as mat */
3276       ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3277     }
3278     PetscFunctionReturn(0);
3279   }
3280 
3281   /* General case: iscol -> iscol_local which has global size of iscol */
3282   if (call == MAT_REUSE_MATRIX) {
3283     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3284     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3285   } else {
3286     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3287   }
3288 
3289   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3290   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3291 
3292   if (call == MAT_INITIAL_MATRIX) {
3293     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3294     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3295   }
3296   PetscFunctionReturn(0);
3297 }
3298 
3299 /*@C
3300      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3301          and "off-diagonal" part of the matrix in CSR format.
3302 
3303    Collective on MPI_Comm
3304 
3305    Input Parameters:
3306 +  comm - MPI communicator
3307 .  A - "diagonal" portion of matrix
3308 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3309 -  garray - global index of B columns
3310 
3311    Output Parameter:
3312 .   mat - the matrix, with input A as its local diagonal matrix
3313    Level: advanced
3314 
3315    Notes:
3316        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3317        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3318 
3319 .seealso: MatCreateMPIAIJWithSplitArrays()
3320 @*/
3321 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3322 {
3323   PetscErrorCode ierr;
3324   Mat_MPIAIJ     *maij;
3325   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3326   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3327   PetscScalar    *oa=b->a;
3328   Mat            Bnew;
3329   PetscInt       m,n,N;
3330 
3331   PetscFunctionBegin;
3332   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3333   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3334   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3335   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3336   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3337   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3338 
3339   /* Get global columns of mat */
3340   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3341 
3342   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3343   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3344   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3345   maij = (Mat_MPIAIJ*)(*mat)->data;
3346 
3347   (*mat)->preallocated = PETSC_TRUE;
3348 
3349   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3350   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3351 
3352   /* Set A as diagonal portion of *mat */
3353   maij->A = A;
3354 
3355   nz = oi[m];
3356   for (i=0; i<nz; i++) {
3357     col   = oj[i];
3358     oj[i] = garray[col];
3359   }
3360 
3361    /* Set Bnew as off-diagonal portion of *mat */
3362   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3363   bnew        = (Mat_SeqAIJ*)Bnew->data;
3364   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3365   maij->B     = Bnew;
3366 
3367   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3368 
3369   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3370   b->free_a       = PETSC_FALSE;
3371   b->free_ij      = PETSC_FALSE;
3372   ierr = MatDestroy(&B);CHKERRQ(ierr);
3373 
3374   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3375   bnew->free_a       = PETSC_TRUE;
3376   bnew->free_ij      = PETSC_TRUE;
3377 
3378   /* condense columns of maij->B */
3379   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3380   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3381   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3382   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3383   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 
3387 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3388 
3389 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3390 {
3391   PetscErrorCode ierr;
3392   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3393   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3394   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3395   Mat            M,Msub,B=a->B;
3396   MatScalar      *aa;
3397   Mat_SeqAIJ     *aij;
3398   PetscInt       *garray = a->garray,*colsub,Ncols;
3399   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3400   IS             iscol_sub,iscmap;
3401   const PetscInt *is_idx,*cmap;
3402   PetscBool      allcolumns=PETSC_FALSE;
3403   IS             iscol_local=NULL;
3404   MPI_Comm       comm;
3405 
3406   PetscFunctionBegin;
3407   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3408 
3409   if (call == MAT_REUSE_MATRIX) {
3410     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3411     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3412     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3413 
3414     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3415     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3416 
3417     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3418     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3419 
3420     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3421 
3422   } else { /* call == MAT_INITIAL_MATRIX) */
3423     PetscBool flg;
3424 
3425     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3426     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3427 
3428     /* (1) iscol -> nonscalable iscol_local */
3429     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3430     ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3431     if (n != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != Ncols %d",n,Ncols);
3432 
3433     /* Check for special case: each processor gets entire matrix columns */
3434     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3435     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3436     if (allcolumns) {
3437       iscol_sub = iscol_local;
3438       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3439       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3440 
3441     } else {
3442       /* (2) iscol_local -> iscol_sub and iscmap */
3443       PetscInt *idx,*cmap1,k;
3444 
3445       /* implementation below requires iscol_local be sorted, it can have duplicate indices */
3446       ierr = ISSorted(iscol_local,&flg);CHKERRQ(ierr);
3447       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsorted iscol_local is not implemented yet");
3448 
3449       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3450       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3451       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3452       count = 0;
3453       k     = 0;
3454       for (i=0; i<Ncols; i++) {
3455         j = is_idx[i];
3456         if (j >= cstart && j < cend) {
3457           /* diagonal part of mat */
3458           idx[count]     = j;
3459           cmap1[count++] = i; /* column index in submat */
3460         } else if (Bn) {
3461           /* off-diagonal part of mat */
3462           if (j == garray[k]) {
3463             idx[count]     = j;
3464             cmap1[count++] = i;  /* column index in submat */
3465           } else if (j > garray[k]) {
3466             while (j > garray[k] && k < Bn-1) k++;
3467             if (j == garray[k]) {
3468               idx[count]     = j;
3469               cmap1[count++] = i; /* column index in submat */
3470             }
3471           }
3472         }
3473       }
3474       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3475 
3476       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3477       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3478       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3479 
3480       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3481     }
3482 
3483     /* (3) Create sequential Msub */
3484     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3485   }
3486 
3487   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3488   aij  = (Mat_SeqAIJ*)(Msub)->data;
3489   ii   = aij->i;
3490   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3491 
3492   /*
3493       m - number of local rows
3494       Ncols - number of columns (same on all processors)
3495       rstart - first row in new global matrix generated
3496   */
3497   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3498 
3499   if (call == MAT_INITIAL_MATRIX) {
3500     /* (4) Create parallel newmat */
3501     PetscMPIInt    rank,size;
3502     PetscInt       csize;
3503 
3504     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3505     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3506 
3507     /*
3508         Determine the number of non-zeros in the diagonal and off-diagonal
3509         portions of the matrix in order to do correct preallocation
3510     */
3511 
3512     /* first get start and end of "diagonal" columns */
3513     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3514     if (csize == PETSC_DECIDE) {
3515       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3516       if (mglobal == Ncols) { /* square matrix */
3517         nlocal = m;
3518       } else {
3519         nlocal = Ncols/size + ((Ncols % size) > rank);
3520       }
3521     } else {
3522       nlocal = csize;
3523     }
3524     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3525     rstart = rend - nlocal;
3526     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3527 
3528     /* next, compute all the lengths */
3529     jj    = aij->j;
3530     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3531     olens = dlens + m;
3532     for (i=0; i<m; i++) {
3533       jend = ii[i+1] - ii[i];
3534       olen = 0;
3535       dlen = 0;
3536       for (j=0; j<jend; j++) {
3537         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3538         else dlen++;
3539         jj++;
3540       }
3541       olens[i] = olen;
3542       dlens[i] = dlen;
3543     }
3544 
3545     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3546     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3547 
3548     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3549     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3550     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3551     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3552     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3553     ierr = PetscFree(dlens);CHKERRQ(ierr);
3554 
3555   } else { /* call == MAT_REUSE_MATRIX */
3556     M    = *newmat;
3557     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3558     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3559     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3560     /*
3561          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3562        rather than the slower MatSetValues().
3563     */
3564     M->was_assembled = PETSC_TRUE;
3565     M->assembled     = PETSC_FALSE;
3566   }
3567 
3568   /* (5) Set values of Msub to *newmat */
3569   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3570   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3571 
3572   jj   = aij->j;
3573   aa   = aij->a;
3574   for (i=0; i<m; i++) {
3575     row = rstart + i;
3576     nz  = ii[i+1] - ii[i];
3577     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3578     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3579     jj += nz; aa += nz;
3580   }
3581   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3582 
3583   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3584   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3585 
3586   ierr = PetscFree(colsub);CHKERRQ(ierr);
3587 
3588   /* save Msub, iscol_sub and iscmap used in processor for next request */
3589   if (call ==  MAT_INITIAL_MATRIX) {
3590     *newmat = M;
3591     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3592     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3593 
3594     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3595     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3596 
3597     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3598     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3599 
3600     if (iscol_local) {
3601       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3602       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3603     }
3604   }
3605   PetscFunctionReturn(0);
3606 }
3607 
3608 /*
3609     Not great since it makes two copies of the submatrix, first an SeqAIJ
3610   in local and then by concatenating the local matrices the end result.
3611   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3612 
3613   Note: This requires a sequential iscol with all indices.
3614 */
3615 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3616 {
3617   PetscErrorCode ierr;
3618   PetscMPIInt    rank,size;
3619   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3620   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3621   Mat            M,Mreuse;
3622   MatScalar      *aa,*vwork;
3623   MPI_Comm       comm;
3624   Mat_SeqAIJ     *aij;
3625   PetscBool      colflag,allcolumns=PETSC_FALSE;
3626 
3627   PetscFunctionBegin;
3628   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3629   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3630   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3631 
3632   /* Check for special case: each processor gets entire matrix columns */
3633   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3634   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3635   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3636 
3637   if (call ==  MAT_REUSE_MATRIX) {
3638     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3639     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3640     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3641   } else {
3642     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3643   }
3644 
3645   /*
3646       m - number of local rows
3647       n - number of columns (same on all processors)
3648       rstart - first row in new global matrix generated
3649   */
3650   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3651   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3652   if (call == MAT_INITIAL_MATRIX) {
3653     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3654     ii  = aij->i;
3655     jj  = aij->j;
3656 
3657     /*
3658         Determine the number of non-zeros in the diagonal and off-diagonal
3659         portions of the matrix in order to do correct preallocation
3660     */
3661 
3662     /* first get start and end of "diagonal" columns */
3663     if (csize == PETSC_DECIDE) {
3664       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3665       if (mglobal == n) { /* square matrix */
3666         nlocal = m;
3667       } else {
3668         nlocal = n/size + ((n % size) > rank);
3669       }
3670     } else {
3671       nlocal = csize;
3672     }
3673     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3674     rstart = rend - nlocal;
3675     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3676 
3677     /* next, compute all the lengths */
3678     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3679     olens = dlens + m;
3680     for (i=0; i<m; i++) {
3681       jend = ii[i+1] - ii[i];
3682       olen = 0;
3683       dlen = 0;
3684       for (j=0; j<jend; j++) {
3685         if (*jj < rstart || *jj >= rend) olen++;
3686         else dlen++;
3687         jj++;
3688       }
3689       olens[i] = olen;
3690       dlens[i] = dlen;
3691     }
3692     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3693     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3694     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3695     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3696     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3697     ierr = PetscFree(dlens);CHKERRQ(ierr);
3698   } else {
3699     PetscInt ml,nl;
3700 
3701     M    = *newmat;
3702     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3703     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3704     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3705     /*
3706          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3707        rather than the slower MatSetValues().
3708     */
3709     M->was_assembled = PETSC_TRUE;
3710     M->assembled     = PETSC_FALSE;
3711   }
3712   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3713   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3714   ii   = aij->i;
3715   jj   = aij->j;
3716   aa   = aij->a;
3717   for (i=0; i<m; i++) {
3718     row   = rstart + i;
3719     nz    = ii[i+1] - ii[i];
3720     cwork = jj;     jj += nz;
3721     vwork = aa;     aa += nz;
3722     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3723   }
3724 
3725   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3726   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3727   *newmat = M;
3728 
3729   /* save submatrix used in processor for next request */
3730   if (call ==  MAT_INITIAL_MATRIX) {
3731     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3732     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3733   }
3734   PetscFunctionReturn(0);
3735 }
3736 
3737 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3738 {
3739   PetscInt       m,cstart, cend,j,nnz,i,d;
3740   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3741   const PetscInt *JJ;
3742   PetscScalar    *values;
3743   PetscErrorCode ierr;
3744   PetscBool      nooffprocentries;
3745 
3746   PetscFunctionBegin;
3747   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3748 
3749   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3750   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3751   m      = B->rmap->n;
3752   cstart = B->cmap->rstart;
3753   cend   = B->cmap->rend;
3754   rstart = B->rmap->rstart;
3755 
3756   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3757 
3758 #if defined(PETSC_USE_DEBUGGING)
3759   for (i=0; i<m; i++) {
3760     nnz = Ii[i+1]- Ii[i];
3761     JJ  = J + Ii[i];
3762     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3763     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3764     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3765   }
3766 #endif
3767 
3768   for (i=0; i<m; i++) {
3769     nnz     = Ii[i+1]- Ii[i];
3770     JJ      = J + Ii[i];
3771     nnz_max = PetscMax(nnz_max,nnz);
3772     d       = 0;
3773     for (j=0; j<nnz; j++) {
3774       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3775     }
3776     d_nnz[i] = d;
3777     o_nnz[i] = nnz - d;
3778   }
3779   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3780   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3781 
3782   if (v) values = (PetscScalar*)v;
3783   else {
3784     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3785   }
3786 
3787   for (i=0; i<m; i++) {
3788     ii   = i + rstart;
3789     nnz  = Ii[i+1]- Ii[i];
3790     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3791   }
3792   nooffprocentries    = B->nooffprocentries;
3793   B->nooffprocentries = PETSC_TRUE;
3794   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3795   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3796   B->nooffprocentries = nooffprocentries;
3797 
3798   if (!v) {
3799     ierr = PetscFree(values);CHKERRQ(ierr);
3800   }
3801   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3802   PetscFunctionReturn(0);
3803 }
3804 
3805 /*@
3806    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3807    (the default parallel PETSc format).
3808 
3809    Collective on MPI_Comm
3810 
3811    Input Parameters:
3812 +  B - the matrix
3813 .  i - the indices into j for the start of each local row (starts with zero)
3814 .  j - the column indices for each local row (starts with zero)
3815 -  v - optional values in the matrix
3816 
3817    Level: developer
3818 
3819    Notes:
3820        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3821      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3822      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3823 
3824        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3825 
3826        The format which is used for the sparse matrix input, is equivalent to a
3827     row-major ordering.. i.e for the following matrix, the input data expected is
3828     as shown
3829 
3830 $        1 0 0
3831 $        2 0 3     P0
3832 $       -------
3833 $        4 5 6     P1
3834 $
3835 $     Process0 [P0]: rows_owned=[0,1]
3836 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3837 $        j =  {0,0,2}  [size = 3]
3838 $        v =  {1,2,3}  [size = 3]
3839 $
3840 $     Process1 [P1]: rows_owned=[2]
3841 $        i =  {0,3}    [size = nrow+1  = 1+1]
3842 $        j =  {0,1,2}  [size = 3]
3843 $        v =  {4,5,6}  [size = 3]
3844 
3845 .keywords: matrix, aij, compressed row, sparse, parallel
3846 
3847 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3848           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3849 @*/
3850 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3851 {
3852   PetscErrorCode ierr;
3853 
3854   PetscFunctionBegin;
3855   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3856   PetscFunctionReturn(0);
3857 }
3858 
3859 /*@C
3860    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3861    (the default parallel PETSc format).  For good matrix assembly performance
3862    the user should preallocate the matrix storage by setting the parameters
3863    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3864    performance can be increased by more than a factor of 50.
3865 
3866    Collective on MPI_Comm
3867 
3868    Input Parameters:
3869 +  B - the matrix
3870 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3871            (same value is used for all local rows)
3872 .  d_nnz - array containing the number of nonzeros in the various rows of the
3873            DIAGONAL portion of the local submatrix (possibly different for each row)
3874            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3875            The size of this array is equal to the number of local rows, i.e 'm'.
3876            For matrices that will be factored, you must leave room for (and set)
3877            the diagonal entry even if it is zero.
3878 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3879            submatrix (same value is used for all local rows).
3880 -  o_nnz - array containing the number of nonzeros in the various rows of the
3881            OFF-DIAGONAL portion of the local submatrix (possibly different for
3882            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3883            structure. The size of this array is equal to the number
3884            of local rows, i.e 'm'.
3885 
3886    If the *_nnz parameter is given then the *_nz parameter is ignored
3887 
3888    The AIJ format (also called the Yale sparse matrix format or
3889    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3890    storage.  The stored row and column indices begin with zero.
3891    See Users-Manual: ch_mat for details.
3892 
3893    The parallel matrix is partitioned such that the first m0 rows belong to
3894    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3895    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3896 
3897    The DIAGONAL portion of the local submatrix of a processor can be defined
3898    as the submatrix which is obtained by extraction the part corresponding to
3899    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3900    first row that belongs to the processor, r2 is the last row belonging to
3901    the this processor, and c1-c2 is range of indices of the local part of a
3902    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3903    common case of a square matrix, the row and column ranges are the same and
3904    the DIAGONAL part is also square. The remaining portion of the local
3905    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3906 
3907    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3908 
3909    You can call MatGetInfo() to get information on how effective the preallocation was;
3910    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3911    You can also run with the option -info and look for messages with the string
3912    malloc in them to see if additional memory allocation was needed.
3913 
3914    Example usage:
3915 
3916    Consider the following 8x8 matrix with 34 non-zero values, that is
3917    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3918    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3919    as follows:
3920 
3921 .vb
3922             1  2  0  |  0  3  0  |  0  4
3923     Proc0   0  5  6  |  7  0  0  |  8  0
3924             9  0 10  | 11  0  0  | 12  0
3925     -------------------------------------
3926            13  0 14  | 15 16 17  |  0  0
3927     Proc1   0 18  0  | 19 20 21  |  0  0
3928             0  0  0  | 22 23  0  | 24  0
3929     -------------------------------------
3930     Proc2  25 26 27  |  0  0 28  | 29  0
3931            30  0  0  | 31 32 33  |  0 34
3932 .ve
3933 
3934    This can be represented as a collection of submatrices as:
3935 
3936 .vb
3937       A B C
3938       D E F
3939       G H I
3940 .ve
3941 
3942    Where the submatrices A,B,C are owned by proc0, D,E,F are
3943    owned by proc1, G,H,I are owned by proc2.
3944 
3945    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3946    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3947    The 'M','N' parameters are 8,8, and have the same values on all procs.
3948 
3949    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3950    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3951    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3952    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3953    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3954    matrix, ans [DF] as another SeqAIJ matrix.
3955 
3956    When d_nz, o_nz parameters are specified, d_nz storage elements are
3957    allocated for every row of the local diagonal submatrix, and o_nz
3958    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3959    One way to choose d_nz and o_nz is to use the max nonzerors per local
3960    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3961    In this case, the values of d_nz,o_nz are:
3962 .vb
3963      proc0 : dnz = 2, o_nz = 2
3964      proc1 : dnz = 3, o_nz = 2
3965      proc2 : dnz = 1, o_nz = 4
3966 .ve
3967    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3968    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3969    for proc3. i.e we are using 12+15+10=37 storage locations to store
3970    34 values.
3971 
3972    When d_nnz, o_nnz parameters are specified, the storage is specified
3973    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3974    In the above case the values for d_nnz,o_nnz are:
3975 .vb
3976      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3977      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3978      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3979 .ve
3980    Here the space allocated is sum of all the above values i.e 34, and
3981    hence pre-allocation is perfect.
3982 
3983    Level: intermediate
3984 
3985 .keywords: matrix, aij, compressed row, sparse, parallel
3986 
3987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3988           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3989 @*/
3990 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3991 {
3992   PetscErrorCode ierr;
3993 
3994   PetscFunctionBegin;
3995   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3996   PetscValidType(B,1);
3997   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3998   PetscFunctionReturn(0);
3999 }
4000 
4001 /*@
4002      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4003          CSR format the local rows.
4004 
4005    Collective on MPI_Comm
4006 
4007    Input Parameters:
4008 +  comm - MPI communicator
4009 .  m - number of local rows (Cannot be PETSC_DECIDE)
4010 .  n - This value should be the same as the local size used in creating the
4011        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4012        calculated if N is given) For square matrices n is almost always m.
4013 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4014 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4015 .   i - row indices
4016 .   j - column indices
4017 -   a - matrix values
4018 
4019    Output Parameter:
4020 .   mat - the matrix
4021 
4022    Level: intermediate
4023 
4024    Notes:
4025        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4026      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4027      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4028 
4029        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4030 
4031        The format which is used for the sparse matrix input, is equivalent to a
4032     row-major ordering.. i.e for the following matrix, the input data expected is
4033     as shown
4034 
4035 $        1 0 0
4036 $        2 0 3     P0
4037 $       -------
4038 $        4 5 6     P1
4039 $
4040 $     Process0 [P0]: rows_owned=[0,1]
4041 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4042 $        j =  {0,0,2}  [size = 3]
4043 $        v =  {1,2,3}  [size = 3]
4044 $
4045 $     Process1 [P1]: rows_owned=[2]
4046 $        i =  {0,3}    [size = nrow+1  = 1+1]
4047 $        j =  {0,1,2}  [size = 3]
4048 $        v =  {4,5,6}  [size = 3]
4049 
4050 .keywords: matrix, aij, compressed row, sparse, parallel
4051 
4052 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4053           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4054 @*/
4055 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4056 {
4057   PetscErrorCode ierr;
4058 
4059   PetscFunctionBegin;
4060   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4061   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4062   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4063   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4064   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4065   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4066   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4067   PetscFunctionReturn(0);
4068 }
4069 
4070 /*@C
4071    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4072    (the default parallel PETSc format).  For good matrix assembly performance
4073    the user should preallocate the matrix storage by setting the parameters
4074    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4075    performance can be increased by more than a factor of 50.
4076 
4077    Collective on MPI_Comm
4078 
4079    Input Parameters:
4080 +  comm - MPI communicator
4081 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4082            This value should be the same as the local size used in creating the
4083            y vector for the matrix-vector product y = Ax.
4084 .  n - This value should be the same as the local size used in creating the
4085        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4086        calculated if N is given) For square matrices n is almost always m.
4087 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4088 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4089 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4090            (same value is used for all local rows)
4091 .  d_nnz - array containing the number of nonzeros in the various rows of the
4092            DIAGONAL portion of the local submatrix (possibly different for each row)
4093            or NULL, if d_nz is used to specify the nonzero structure.
4094            The size of this array is equal to the number of local rows, i.e 'm'.
4095 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4096            submatrix (same value is used for all local rows).
4097 -  o_nnz - array containing the number of nonzeros in the various rows of the
4098            OFF-DIAGONAL portion of the local submatrix (possibly different for
4099            each row) or NULL, if o_nz is used to specify the nonzero
4100            structure. The size of this array is equal to the number
4101            of local rows, i.e 'm'.
4102 
4103    Output Parameter:
4104 .  A - the matrix
4105 
4106    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4107    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4108    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4109 
4110    Notes:
4111    If the *_nnz parameter is given then the *_nz parameter is ignored
4112 
4113    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4114    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4115    storage requirements for this matrix.
4116 
4117    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4118    processor than it must be used on all processors that share the object for
4119    that argument.
4120 
4121    The user MUST specify either the local or global matrix dimensions
4122    (possibly both).
4123 
4124    The parallel matrix is partitioned across processors such that the
4125    first m0 rows belong to process 0, the next m1 rows belong to
4126    process 1, the next m2 rows belong to process 2 etc.. where
4127    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4128    values corresponding to [m x N] submatrix.
4129 
4130    The columns are logically partitioned with the n0 columns belonging
4131    to 0th partition, the next n1 columns belonging to the next
4132    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4133 
4134    The DIAGONAL portion of the local submatrix on any given processor
4135    is the submatrix corresponding to the rows and columns m,n
4136    corresponding to the given processor. i.e diagonal matrix on
4137    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4138    etc. The remaining portion of the local submatrix [m x (N-n)]
4139    constitute the OFF-DIAGONAL portion. The example below better
4140    illustrates this concept.
4141 
4142    For a square global matrix we define each processor's diagonal portion
4143    to be its local rows and the corresponding columns (a square submatrix);
4144    each processor's off-diagonal portion encompasses the remainder of the
4145    local matrix (a rectangular submatrix).
4146 
4147    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4148 
4149    When calling this routine with a single process communicator, a matrix of
4150    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4151    type of communicator, use the construction mechanism
4152 .vb
4153      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4154 .ve
4155 
4156 $     MatCreate(...,&A);
4157 $     MatSetType(A,MATMPIAIJ);
4158 $     MatSetSizes(A, m,n,M,N);
4159 $     MatMPIAIJSetPreallocation(A,...);
4160 
4161    By default, this format uses inodes (identical nodes) when possible.
4162    We search for consecutive rows with the same nonzero structure, thereby
4163    reusing matrix information to achieve increased efficiency.
4164 
4165    Options Database Keys:
4166 +  -mat_no_inode  - Do not use inodes
4167 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4168 -  -mat_aij_oneindex - Internally use indexing starting at 1
4169         rather than 0.  Note that when calling MatSetValues(),
4170         the user still MUST index entries starting at 0!
4171 
4172 
4173    Example usage:
4174 
4175    Consider the following 8x8 matrix with 34 non-zero values, that is
4176    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4177    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4178    as follows
4179 
4180 .vb
4181             1  2  0  |  0  3  0  |  0  4
4182     Proc0   0  5  6  |  7  0  0  |  8  0
4183             9  0 10  | 11  0  0  | 12  0
4184     -------------------------------------
4185            13  0 14  | 15 16 17  |  0  0
4186     Proc1   0 18  0  | 19 20 21  |  0  0
4187             0  0  0  | 22 23  0  | 24  0
4188     -------------------------------------
4189     Proc2  25 26 27  |  0  0 28  | 29  0
4190            30  0  0  | 31 32 33  |  0 34
4191 .ve
4192 
4193    This can be represented as a collection of submatrices as
4194 
4195 .vb
4196       A B C
4197       D E F
4198       G H I
4199 .ve
4200 
4201    Where the submatrices A,B,C are owned by proc0, D,E,F are
4202    owned by proc1, G,H,I are owned by proc2.
4203 
4204    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4205    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4206    The 'M','N' parameters are 8,8, and have the same values on all procs.
4207 
4208    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4209    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4210    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4211    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4212    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4213    matrix, ans [DF] as another SeqAIJ matrix.
4214 
4215    When d_nz, o_nz parameters are specified, d_nz storage elements are
4216    allocated for every row of the local diagonal submatrix, and o_nz
4217    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4218    One way to choose d_nz and o_nz is to use the max nonzerors per local
4219    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4220    In this case, the values of d_nz,o_nz are
4221 .vb
4222      proc0 : dnz = 2, o_nz = 2
4223      proc1 : dnz = 3, o_nz = 2
4224      proc2 : dnz = 1, o_nz = 4
4225 .ve
4226    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4227    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4228    for proc3. i.e we are using 12+15+10=37 storage locations to store
4229    34 values.
4230 
4231    When d_nnz, o_nnz parameters are specified, the storage is specified
4232    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4233    In the above case the values for d_nnz,o_nnz are
4234 .vb
4235      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4236      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4237      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4238 .ve
4239    Here the space allocated is sum of all the above values i.e 34, and
4240    hence pre-allocation is perfect.
4241 
4242    Level: intermediate
4243 
4244 .keywords: matrix, aij, compressed row, sparse, parallel
4245 
4246 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4247           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4248 @*/
4249 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4250 {
4251   PetscErrorCode ierr;
4252   PetscMPIInt    size;
4253 
4254   PetscFunctionBegin;
4255   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4256   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4257   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4258   if (size > 1) {
4259     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4260     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4261   } else {
4262     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4263     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4264   }
4265   PetscFunctionReturn(0);
4266 }
4267 
4268 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4269 {
4270   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4271   PetscBool      flg;
4272   PetscErrorCode ierr;
4273 
4274   PetscFunctionBegin;
4275   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4276   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4277   if (Ad)     *Ad     = a->A;
4278   if (Ao)     *Ao     = a->B;
4279   if (colmap) *colmap = a->garray;
4280   PetscFunctionReturn(0);
4281 }
4282 
4283 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4284 {
4285   PetscErrorCode ierr;
4286   PetscInt       m,N,i,rstart,nnz,Ii;
4287   PetscInt       *indx;
4288   PetscScalar    *values;
4289 
4290   PetscFunctionBegin;
4291   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4292   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4293     PetscInt       *dnz,*onz,sum,bs,cbs;
4294 
4295     if (n == PETSC_DECIDE) {
4296       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4297     }
4298     /* Check sum(n) = N */
4299     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4300     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4301 
4302     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4303     rstart -= m;
4304 
4305     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4306     for (i=0; i<m; i++) {
4307       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4308       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4309       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4310     }
4311 
4312     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4313     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4314     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4315     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4316     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4317     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4318     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4319     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4320   }
4321 
4322   /* numeric phase */
4323   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4324   for (i=0; i<m; i++) {
4325     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4326     Ii   = i + rstart;
4327     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4328     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4329   }
4330   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4331   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4332   PetscFunctionReturn(0);
4333 }
4334 
4335 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4336 {
4337   PetscErrorCode    ierr;
4338   PetscMPIInt       rank;
4339   PetscInt          m,N,i,rstart,nnz;
4340   size_t            len;
4341   const PetscInt    *indx;
4342   PetscViewer       out;
4343   char              *name;
4344   Mat               B;
4345   const PetscScalar *values;
4346 
4347   PetscFunctionBegin;
4348   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4349   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4350   /* Should this be the type of the diagonal block of A? */
4351   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4352   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4353   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4354   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4355   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4356   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4357   for (i=0; i<m; i++) {
4358     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4359     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4360     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4361   }
4362   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4363   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4364 
4365   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4366   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4367   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4368   sprintf(name,"%s.%d",outfile,rank);
4369   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4370   ierr = PetscFree(name);CHKERRQ(ierr);
4371   ierr = MatView(B,out);CHKERRQ(ierr);
4372   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4373   ierr = MatDestroy(&B);CHKERRQ(ierr);
4374   PetscFunctionReturn(0);
4375 }
4376 
4377 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4378 {
4379   PetscErrorCode      ierr;
4380   Mat_Merge_SeqsToMPI *merge;
4381   PetscContainer      container;
4382 
4383   PetscFunctionBegin;
4384   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4385   if (container) {
4386     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4387     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4388     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4389     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4390     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4391     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4392     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4393     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4394     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4395     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4396     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4397     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4398     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4399     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4400     ierr = PetscFree(merge);CHKERRQ(ierr);
4401     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4402   }
4403   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 #include <../src/mat/utils/freespace.h>
4408 #include <petscbt.h>
4409 
4410 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4411 {
4412   PetscErrorCode      ierr;
4413   MPI_Comm            comm;
4414   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4415   PetscMPIInt         size,rank,taga,*len_s;
4416   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4417   PetscInt            proc,m;
4418   PetscInt            **buf_ri,**buf_rj;
4419   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4420   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4421   MPI_Request         *s_waits,*r_waits;
4422   MPI_Status          *status;
4423   MatScalar           *aa=a->a;
4424   MatScalar           **abuf_r,*ba_i;
4425   Mat_Merge_SeqsToMPI *merge;
4426   PetscContainer      container;
4427 
4428   PetscFunctionBegin;
4429   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4430   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4431 
4432   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4433   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4434 
4435   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4436   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4437 
4438   bi     = merge->bi;
4439   bj     = merge->bj;
4440   buf_ri = merge->buf_ri;
4441   buf_rj = merge->buf_rj;
4442 
4443   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4444   owners = merge->rowmap->range;
4445   len_s  = merge->len_s;
4446 
4447   /* send and recv matrix values */
4448   /*-----------------------------*/
4449   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4450   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4451 
4452   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4453   for (proc=0,k=0; proc<size; proc++) {
4454     if (!len_s[proc]) continue;
4455     i    = owners[proc];
4456     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4457     k++;
4458   }
4459 
4460   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4461   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4462   ierr = PetscFree(status);CHKERRQ(ierr);
4463 
4464   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4465   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4466 
4467   /* insert mat values of mpimat */
4468   /*----------------------------*/
4469   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4470   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4471 
4472   for (k=0; k<merge->nrecv; k++) {
4473     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4474     nrows       = *(buf_ri_k[k]);
4475     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4476     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4477   }
4478 
4479   /* set values of ba */
4480   m = merge->rowmap->n;
4481   for (i=0; i<m; i++) {
4482     arow = owners[rank] + i;
4483     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4484     bnzi = bi[i+1] - bi[i];
4485     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4486 
4487     /* add local non-zero vals of this proc's seqmat into ba */
4488     anzi   = ai[arow+1] - ai[arow];
4489     aj     = a->j + ai[arow];
4490     aa     = a->a + ai[arow];
4491     nextaj = 0;
4492     for (j=0; nextaj<anzi; j++) {
4493       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4494         ba_i[j] += aa[nextaj++];
4495       }
4496     }
4497 
4498     /* add received vals into ba */
4499     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4500       /* i-th row */
4501       if (i == *nextrow[k]) {
4502         anzi   = *(nextai[k]+1) - *nextai[k];
4503         aj     = buf_rj[k] + *(nextai[k]);
4504         aa     = abuf_r[k] + *(nextai[k]);
4505         nextaj = 0;
4506         for (j=0; nextaj<anzi; j++) {
4507           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4508             ba_i[j] += aa[nextaj++];
4509           }
4510         }
4511         nextrow[k]++; nextai[k]++;
4512       }
4513     }
4514     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4515   }
4516   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4517   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518 
4519   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4520   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4521   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4522   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4523   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4524   PetscFunctionReturn(0);
4525 }
4526 
4527 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4528 {
4529   PetscErrorCode      ierr;
4530   Mat                 B_mpi;
4531   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4532   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4533   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4534   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4535   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4536   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4537   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4538   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4539   MPI_Status          *status;
4540   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4541   PetscBT             lnkbt;
4542   Mat_Merge_SeqsToMPI *merge;
4543   PetscContainer      container;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4547 
4548   /* make sure it is a PETSc comm */
4549   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4550   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4551   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4552 
4553   ierr = PetscNew(&merge);CHKERRQ(ierr);
4554   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4555 
4556   /* determine row ownership */
4557   /*---------------------------------------------------------*/
4558   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4559   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4560   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4561   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4562   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4563   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4564   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4565 
4566   m      = merge->rowmap->n;
4567   owners = merge->rowmap->range;
4568 
4569   /* determine the number of messages to send, their lengths */
4570   /*---------------------------------------------------------*/
4571   len_s = merge->len_s;
4572 
4573   len          = 0; /* length of buf_si[] */
4574   merge->nsend = 0;
4575   for (proc=0; proc<size; proc++) {
4576     len_si[proc] = 0;
4577     if (proc == rank) {
4578       len_s[proc] = 0;
4579     } else {
4580       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4581       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4582     }
4583     if (len_s[proc]) {
4584       merge->nsend++;
4585       nrows = 0;
4586       for (i=owners[proc]; i<owners[proc+1]; i++) {
4587         if (ai[i+1] > ai[i]) nrows++;
4588       }
4589       len_si[proc] = 2*(nrows+1);
4590       len         += len_si[proc];
4591     }
4592   }
4593 
4594   /* determine the number and length of messages to receive for ij-structure */
4595   /*-------------------------------------------------------------------------*/
4596   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4597   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4598 
4599   /* post the Irecv of j-structure */
4600   /*-------------------------------*/
4601   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4602   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4603 
4604   /* post the Isend of j-structure */
4605   /*--------------------------------*/
4606   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4607 
4608   for (proc=0, k=0; proc<size; proc++) {
4609     if (!len_s[proc]) continue;
4610     i    = owners[proc];
4611     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4612     k++;
4613   }
4614 
4615   /* receives and sends of j-structure are complete */
4616   /*------------------------------------------------*/
4617   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4618   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4619 
4620   /* send and recv i-structure */
4621   /*---------------------------*/
4622   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4623   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4624 
4625   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4626   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4627   for (proc=0,k=0; proc<size; proc++) {
4628     if (!len_s[proc]) continue;
4629     /* form outgoing message for i-structure:
4630          buf_si[0]:                 nrows to be sent
4631                [1:nrows]:           row index (global)
4632                [nrows+1:2*nrows+1]: i-structure index
4633     */
4634     /*-------------------------------------------*/
4635     nrows       = len_si[proc]/2 - 1;
4636     buf_si_i    = buf_si + nrows+1;
4637     buf_si[0]   = nrows;
4638     buf_si_i[0] = 0;
4639     nrows       = 0;
4640     for (i=owners[proc]; i<owners[proc+1]; i++) {
4641       anzi = ai[i+1] - ai[i];
4642       if (anzi) {
4643         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4644         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4645         nrows++;
4646       }
4647     }
4648     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4649     k++;
4650     buf_si += len_si[proc];
4651   }
4652 
4653   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4654   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4655 
4656   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4657   for (i=0; i<merge->nrecv; i++) {
4658     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4659   }
4660 
4661   ierr = PetscFree(len_si);CHKERRQ(ierr);
4662   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4663   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4664   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4665   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4666   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4667   ierr = PetscFree(status);CHKERRQ(ierr);
4668 
4669   /* compute a local seq matrix in each processor */
4670   /*----------------------------------------------*/
4671   /* allocate bi array and free space for accumulating nonzero column info */
4672   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4673   bi[0] = 0;
4674 
4675   /* create and initialize a linked list */
4676   nlnk = N+1;
4677   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4678 
4679   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4680   len  = ai[owners[rank+1]] - ai[owners[rank]];
4681   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4682 
4683   current_space = free_space;
4684 
4685   /* determine symbolic info for each local row */
4686   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4687 
4688   for (k=0; k<merge->nrecv; k++) {
4689     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4690     nrows       = *buf_ri_k[k];
4691     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4692     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4693   }
4694 
4695   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4696   len  = 0;
4697   for (i=0; i<m; i++) {
4698     bnzi = 0;
4699     /* add local non-zero cols of this proc's seqmat into lnk */
4700     arow  = owners[rank] + i;
4701     anzi  = ai[arow+1] - ai[arow];
4702     aj    = a->j + ai[arow];
4703     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4704     bnzi += nlnk;
4705     /* add received col data into lnk */
4706     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4707       if (i == *nextrow[k]) { /* i-th row */
4708         anzi  = *(nextai[k]+1) - *nextai[k];
4709         aj    = buf_rj[k] + *nextai[k];
4710         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4711         bnzi += nlnk;
4712         nextrow[k]++; nextai[k]++;
4713       }
4714     }
4715     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4716 
4717     /* if free space is not available, make more free space */
4718     if (current_space->local_remaining<bnzi) {
4719       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4720       nspacedouble++;
4721     }
4722     /* copy data into free space, then initialize lnk */
4723     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4724     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4725 
4726     current_space->array           += bnzi;
4727     current_space->local_used      += bnzi;
4728     current_space->local_remaining -= bnzi;
4729 
4730     bi[i+1] = bi[i] + bnzi;
4731   }
4732 
4733   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4734 
4735   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4736   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4737   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4738 
4739   /* create symbolic parallel matrix B_mpi */
4740   /*---------------------------------------*/
4741   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4742   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4743   if (n==PETSC_DECIDE) {
4744     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4745   } else {
4746     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4747   }
4748   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4749   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4750   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4751   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4752   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4753 
4754   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4755   B_mpi->assembled    = PETSC_FALSE;
4756   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4757   merge->bi           = bi;
4758   merge->bj           = bj;
4759   merge->buf_ri       = buf_ri;
4760   merge->buf_rj       = buf_rj;
4761   merge->coi          = NULL;
4762   merge->coj          = NULL;
4763   merge->owners_co    = NULL;
4764 
4765   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4766 
4767   /* attach the supporting struct to B_mpi for reuse */
4768   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4769   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4770   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4771   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4772   *mpimat = B_mpi;
4773 
4774   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4775   PetscFunctionReturn(0);
4776 }
4777 
4778 /*@C
4779       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4780                  matrices from each processor
4781 
4782     Collective on MPI_Comm
4783 
4784    Input Parameters:
4785 +    comm - the communicators the parallel matrix will live on
4786 .    seqmat - the input sequential matrices
4787 .    m - number of local rows (or PETSC_DECIDE)
4788 .    n - number of local columns (or PETSC_DECIDE)
4789 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4790 
4791    Output Parameter:
4792 .    mpimat - the parallel matrix generated
4793 
4794     Level: advanced
4795 
4796    Notes:
4797      The dimensions of the sequential matrix in each processor MUST be the same.
4798      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4799      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4800 @*/
4801 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4802 {
4803   PetscErrorCode ierr;
4804   PetscMPIInt    size;
4805 
4806   PetscFunctionBegin;
4807   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4808   if (size == 1) {
4809     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4810     if (scall == MAT_INITIAL_MATRIX) {
4811       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4812     } else {
4813       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4814     }
4815     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4816     PetscFunctionReturn(0);
4817   }
4818   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4819   if (scall == MAT_INITIAL_MATRIX) {
4820     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4821   }
4822   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4823   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4824   PetscFunctionReturn(0);
4825 }
4826 
4827 /*@
4828      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4829           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4830           with MatGetSize()
4831 
4832     Not Collective
4833 
4834    Input Parameters:
4835 +    A - the matrix
4836 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4837 
4838    Output Parameter:
4839 .    A_loc - the local sequential matrix generated
4840 
4841     Level: developer
4842 
4843 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4844 
4845 @*/
4846 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4847 {
4848   PetscErrorCode ierr;
4849   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4850   Mat_SeqAIJ     *mat,*a,*b;
4851   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4852   MatScalar      *aa,*ba,*cam;
4853   PetscScalar    *ca;
4854   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4855   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4856   PetscBool      match;
4857   MPI_Comm       comm;
4858   PetscMPIInt    size;
4859 
4860   PetscFunctionBegin;
4861   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4862   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4863   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4864   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4865   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4866 
4867   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4868   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4869   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4870   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4871   aa = a->a; ba = b->a;
4872   if (scall == MAT_INITIAL_MATRIX) {
4873     if (size == 1) {
4874       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4875       PetscFunctionReturn(0);
4876     }
4877 
4878     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4879     ci[0] = 0;
4880     for (i=0; i<am; i++) {
4881       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4882     }
4883     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4884     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4885     k    = 0;
4886     for (i=0; i<am; i++) {
4887       ncols_o = bi[i+1] - bi[i];
4888       ncols_d = ai[i+1] - ai[i];
4889       /* off-diagonal portion of A */
4890       for (jo=0; jo<ncols_o; jo++) {
4891         col = cmap[*bj];
4892         if (col >= cstart) break;
4893         cj[k]   = col; bj++;
4894         ca[k++] = *ba++;
4895       }
4896       /* diagonal portion of A */
4897       for (j=0; j<ncols_d; j++) {
4898         cj[k]   = cstart + *aj++;
4899         ca[k++] = *aa++;
4900       }
4901       /* off-diagonal portion of A */
4902       for (j=jo; j<ncols_o; j++) {
4903         cj[k]   = cmap[*bj++];
4904         ca[k++] = *ba++;
4905       }
4906     }
4907     /* put together the new matrix */
4908     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4909     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4910     /* Since these are PETSc arrays, change flags to free them as necessary. */
4911     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4912     mat->free_a  = PETSC_TRUE;
4913     mat->free_ij = PETSC_TRUE;
4914     mat->nonew   = 0;
4915   } else if (scall == MAT_REUSE_MATRIX) {
4916     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4917     ci = mat->i; cj = mat->j; cam = mat->a;
4918     for (i=0; i<am; i++) {
4919       /* off-diagonal portion of A */
4920       ncols_o = bi[i+1] - bi[i];
4921       for (jo=0; jo<ncols_o; jo++) {
4922         col = cmap[*bj];
4923         if (col >= cstart) break;
4924         *cam++ = *ba++; bj++;
4925       }
4926       /* diagonal portion of A */
4927       ncols_d = ai[i+1] - ai[i];
4928       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4929       /* off-diagonal portion of A */
4930       for (j=jo; j<ncols_o; j++) {
4931         *cam++ = *ba++; bj++;
4932       }
4933     }
4934   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4935   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4936   PetscFunctionReturn(0);
4937 }
4938 
4939 /*@C
4940      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4941 
4942     Not Collective
4943 
4944    Input Parameters:
4945 +    A - the matrix
4946 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4947 -    row, col - index sets of rows and columns to extract (or NULL)
4948 
4949    Output Parameter:
4950 .    A_loc - the local sequential matrix generated
4951 
4952     Level: developer
4953 
4954 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4955 
4956 @*/
4957 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4958 {
4959   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4960   PetscErrorCode ierr;
4961   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4962   IS             isrowa,iscola;
4963   Mat            *aloc;
4964   PetscBool      match;
4965 
4966   PetscFunctionBegin;
4967   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4968   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4969   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4970   if (!row) {
4971     start = A->rmap->rstart; end = A->rmap->rend;
4972     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4973   } else {
4974     isrowa = *row;
4975   }
4976   if (!col) {
4977     start = A->cmap->rstart;
4978     cmap  = a->garray;
4979     nzA   = a->A->cmap->n;
4980     nzB   = a->B->cmap->n;
4981     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4982     ncols = 0;
4983     for (i=0; i<nzB; i++) {
4984       if (cmap[i] < start) idx[ncols++] = cmap[i];
4985       else break;
4986     }
4987     imark = i;
4988     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4989     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4990     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4991   } else {
4992     iscola = *col;
4993   }
4994   if (scall != MAT_INITIAL_MATRIX) {
4995     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4996     aloc[0] = *A_loc;
4997   }
4998   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4999   *A_loc = aloc[0];
5000   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5001   if (!row) {
5002     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5003   }
5004   if (!col) {
5005     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5006   }
5007   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5008   PetscFunctionReturn(0);
5009 }
5010 
5011 /*@C
5012     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5013 
5014     Collective on Mat
5015 
5016    Input Parameters:
5017 +    A,B - the matrices in mpiaij format
5018 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5019 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5020 
5021    Output Parameter:
5022 +    rowb, colb - index sets of rows and columns of B to extract
5023 -    B_seq - the sequential matrix generated
5024 
5025     Level: developer
5026 
5027 @*/
5028 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5029 {
5030   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5031   PetscErrorCode ierr;
5032   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5033   IS             isrowb,iscolb;
5034   Mat            *bseq=NULL;
5035 
5036   PetscFunctionBegin;
5037   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5038     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5039   }
5040   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5041 
5042   if (scall == MAT_INITIAL_MATRIX) {
5043     start = A->cmap->rstart;
5044     cmap  = a->garray;
5045     nzA   = a->A->cmap->n;
5046     nzB   = a->B->cmap->n;
5047     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5048     ncols = 0;
5049     for (i=0; i<nzB; i++) {  /* row < local row index */
5050       if (cmap[i] < start) idx[ncols++] = cmap[i];
5051       else break;
5052     }
5053     imark = i;
5054     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5055     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5056     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5057     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5058   } else {
5059     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5060     isrowb  = *rowb; iscolb = *colb;
5061     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5062     bseq[0] = *B_seq;
5063   }
5064   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5065   *B_seq = bseq[0];
5066   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5067   if (!rowb) {
5068     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5069   } else {
5070     *rowb = isrowb;
5071   }
5072   if (!colb) {
5073     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5074   } else {
5075     *colb = iscolb;
5076   }
5077   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5078   PetscFunctionReturn(0);
5079 }
5080 
5081 /*
5082     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5083     of the OFF-DIAGONAL portion of local A
5084 
5085     Collective on Mat
5086 
5087    Input Parameters:
5088 +    A,B - the matrices in mpiaij format
5089 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5090 
5091    Output Parameter:
5092 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5093 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5094 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5095 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5096 
5097     Level: developer
5098 
5099 */
5100 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5101 {
5102   VecScatter_MPI_General *gen_to,*gen_from;
5103   PetscErrorCode         ierr;
5104   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5105   Mat_SeqAIJ             *b_oth;
5106   VecScatter             ctx =a->Mvctx;
5107   MPI_Comm               comm;
5108   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5109   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5110   PetscInt               *rvalues,*svalues;
5111   MatScalar              *b_otha,*bufa,*bufA;
5112   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5113   MPI_Request            *rwaits = NULL,*swaits = NULL;
5114   MPI_Status             *sstatus,rstatus;
5115   PetscMPIInt            jj,size;
5116   PetscInt               *cols,sbs,rbs;
5117   PetscScalar            *vals;
5118 
5119   PetscFunctionBegin;
5120   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5121   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5122 
5123   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5124     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5125   }
5126   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5127   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5128 
5129   if (size == 1) {
5130     startsj_s = NULL;
5131     bufa_ptr  = NULL;
5132     *B_oth    = NULL;
5133     PetscFunctionReturn(0);
5134   }
5135 
5136   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5137   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5138   nrecvs   = gen_from->n;
5139   nsends   = gen_to->n;
5140 
5141   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5142   srow    = gen_to->indices;    /* local row index to be sent */
5143   sstarts = gen_to->starts;
5144   sprocs  = gen_to->procs;
5145   sstatus = gen_to->sstatus;
5146   sbs     = gen_to->bs;
5147   rstarts = gen_from->starts;
5148   rprocs  = gen_from->procs;
5149   rbs     = gen_from->bs;
5150 
5151   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5152   if (scall == MAT_INITIAL_MATRIX) {
5153     /* i-array */
5154     /*---------*/
5155     /*  post receives */
5156     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5157     for (i=0; i<nrecvs; i++) {
5158       rowlen = rvalues + rstarts[i]*rbs;
5159       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5160       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5161     }
5162 
5163     /* pack the outgoing message */
5164     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5165 
5166     sstartsj[0] = 0;
5167     rstartsj[0] = 0;
5168     len         = 0; /* total length of j or a array to be sent */
5169     k           = 0;
5170     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5171     for (i=0; i<nsends; i++) {
5172       rowlen = svalues + sstarts[i]*sbs;
5173       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5174       for (j=0; j<nrows; j++) {
5175         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5176         for (l=0; l<sbs; l++) {
5177           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5178 
5179           rowlen[j*sbs+l] = ncols;
5180 
5181           len += ncols;
5182           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5183         }
5184         k++;
5185       }
5186       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5187 
5188       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5189     }
5190     /* recvs and sends of i-array are completed */
5191     i = nrecvs;
5192     while (i--) {
5193       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5194     }
5195     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5196     ierr = PetscFree(svalues);CHKERRQ(ierr);
5197 
5198     /* allocate buffers for sending j and a arrays */
5199     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5200     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5201 
5202     /* create i-array of B_oth */
5203     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5204 
5205     b_othi[0] = 0;
5206     len       = 0; /* total length of j or a array to be received */
5207     k         = 0;
5208     for (i=0; i<nrecvs; i++) {
5209       rowlen = rvalues + rstarts[i]*rbs;
5210       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5211       for (j=0; j<nrows; j++) {
5212         b_othi[k+1] = b_othi[k] + rowlen[j];
5213         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5214         k++;
5215       }
5216       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5217     }
5218     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5219 
5220     /* allocate space for j and a arrrays of B_oth */
5221     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5222     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5223 
5224     /* j-array */
5225     /*---------*/
5226     /*  post receives of j-array */
5227     for (i=0; i<nrecvs; i++) {
5228       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5229       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5230     }
5231 
5232     /* pack the outgoing message j-array */
5233     k = 0;
5234     for (i=0; i<nsends; i++) {
5235       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5236       bufJ  = bufj+sstartsj[i];
5237       for (j=0; j<nrows; j++) {
5238         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5239         for (ll=0; ll<sbs; ll++) {
5240           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5241           for (l=0; l<ncols; l++) {
5242             *bufJ++ = cols[l];
5243           }
5244           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5245         }
5246       }
5247       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5248     }
5249 
5250     /* recvs and sends of j-array are completed */
5251     i = nrecvs;
5252     while (i--) {
5253       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5254     }
5255     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5256   } else if (scall == MAT_REUSE_MATRIX) {
5257     sstartsj = *startsj_s;
5258     rstartsj = *startsj_r;
5259     bufa     = *bufa_ptr;
5260     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5261     b_otha   = b_oth->a;
5262   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5263 
5264   /* a-array */
5265   /*---------*/
5266   /*  post receives of a-array */
5267   for (i=0; i<nrecvs; i++) {
5268     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5269     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5270   }
5271 
5272   /* pack the outgoing message a-array */
5273   k = 0;
5274   for (i=0; i<nsends; i++) {
5275     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5276     bufA  = bufa+sstartsj[i];
5277     for (j=0; j<nrows; j++) {
5278       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5279       for (ll=0; ll<sbs; ll++) {
5280         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5281         for (l=0; l<ncols; l++) {
5282           *bufA++ = vals[l];
5283         }
5284         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5285       }
5286     }
5287     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5288   }
5289   /* recvs and sends of a-array are completed */
5290   i = nrecvs;
5291   while (i--) {
5292     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5293   }
5294   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5295   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5296 
5297   if (scall == MAT_INITIAL_MATRIX) {
5298     /* put together the new matrix */
5299     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5300 
5301     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5302     /* Since these are PETSc arrays, change flags to free them as necessary. */
5303     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5304     b_oth->free_a  = PETSC_TRUE;
5305     b_oth->free_ij = PETSC_TRUE;
5306     b_oth->nonew   = 0;
5307 
5308     ierr = PetscFree(bufj);CHKERRQ(ierr);
5309     if (!startsj_s || !bufa_ptr) {
5310       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5311       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5312     } else {
5313       *startsj_s = sstartsj;
5314       *startsj_r = rstartsj;
5315       *bufa_ptr  = bufa;
5316     }
5317   }
5318   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5319   PetscFunctionReturn(0);
5320 }
5321 
5322 /*@C
5323   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5324 
5325   Not Collective
5326 
5327   Input Parameters:
5328 . A - The matrix in mpiaij format
5329 
5330   Output Parameter:
5331 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5332 . colmap - A map from global column index to local index into lvec
5333 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5334 
5335   Level: developer
5336 
5337 @*/
5338 #if defined(PETSC_USE_CTABLE)
5339 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5340 #else
5341 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5342 #endif
5343 {
5344   Mat_MPIAIJ *a;
5345 
5346   PetscFunctionBegin;
5347   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5348   PetscValidPointer(lvec, 2);
5349   PetscValidPointer(colmap, 3);
5350   PetscValidPointer(multScatter, 4);
5351   a = (Mat_MPIAIJ*) A->data;
5352   if (lvec) *lvec = a->lvec;
5353   if (colmap) *colmap = a->colmap;
5354   if (multScatter) *multScatter = a->Mvctx;
5355   PetscFunctionReturn(0);
5356 }
5357 
5358 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5359 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5360 #if defined(PETSC_HAVE_MKL)
5361 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5362 #endif
5363 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5364 #if defined(PETSC_HAVE_ELEMENTAL)
5365 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5366 #endif
5367 #if defined(PETSC_HAVE_HYPRE)
5368 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5369 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5370 #endif
5371 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5372 
5373 /*
5374     Computes (B'*A')' since computing B*A directly is untenable
5375 
5376                n                       p                          p
5377         (              )       (              )         (                  )
5378       m (      A       )  *  n (       B      )   =   m (         C        )
5379         (              )       (              )         (                  )
5380 
5381 */
5382 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5383 {
5384   PetscErrorCode ierr;
5385   Mat            At,Bt,Ct;
5386 
5387   PetscFunctionBegin;
5388   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5389   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5390   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5391   ierr = MatDestroy(&At);CHKERRQ(ierr);
5392   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5393   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5394   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5395   PetscFunctionReturn(0);
5396 }
5397 
5398 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5399 {
5400   PetscErrorCode ierr;
5401   PetscInt       m=A->rmap->n,n=B->cmap->n;
5402   Mat            Cmat;
5403 
5404   PetscFunctionBegin;
5405   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5406   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5407   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5408   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5409   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5410   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5411   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5412   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5413 
5414   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5415 
5416   *C = Cmat;
5417   PetscFunctionReturn(0);
5418 }
5419 
5420 /* ----------------------------------------------------------------*/
5421 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5422 {
5423   PetscErrorCode ierr;
5424 
5425   PetscFunctionBegin;
5426   if (scall == MAT_INITIAL_MATRIX) {
5427     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5428     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5429     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5430   }
5431   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5432   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5433   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5434   PetscFunctionReturn(0);
5435 }
5436 
5437 /*MC
5438    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5439 
5440    Options Database Keys:
5441 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5442 
5443   Level: beginner
5444 
5445 .seealso: MatCreateAIJ()
5446 M*/
5447 
5448 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5449 {
5450   Mat_MPIAIJ     *b;
5451   PetscErrorCode ierr;
5452   PetscMPIInt    size;
5453 
5454   PetscFunctionBegin;
5455   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5456 
5457   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5458   B->data       = (void*)b;
5459   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5460   B->assembled  = PETSC_FALSE;
5461   B->insertmode = NOT_SET_VALUES;
5462   b->size       = size;
5463 
5464   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5465 
5466   /* build cache for off array entries formed */
5467   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5468 
5469   b->donotstash  = PETSC_FALSE;
5470   b->colmap      = 0;
5471   b->garray      = 0;
5472   b->roworiented = PETSC_TRUE;
5473 
5474   /* stuff used for matrix vector multiply */
5475   b->lvec  = NULL;
5476   b->Mvctx = NULL;
5477 
5478   /* stuff for MatGetRow() */
5479   b->rowindices   = 0;
5480   b->rowvalues    = 0;
5481   b->getrowactive = PETSC_FALSE;
5482 
5483   /* flexible pointer used in CUSP/CUSPARSE classes */
5484   b->spptr = NULL;
5485 
5486   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5487   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5488   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5489   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5490   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5491   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5492   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5493   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5494   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5495 #if defined(PETSC_HAVE_MKL)
5496   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5497 #endif
5498   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5499   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5500 #if defined(PETSC_HAVE_ELEMENTAL)
5501   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5502 #endif
5503 #if defined(PETSC_HAVE_HYPRE)
5504   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5505 #endif
5506   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5507   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5508   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5509   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5510 #if defined(PETSC_HAVE_HYPRE)
5511   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5512 #endif
5513   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 /*@C
5518      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5519          and "off-diagonal" part of the matrix in CSR format.
5520 
5521    Collective on MPI_Comm
5522 
5523    Input Parameters:
5524 +  comm - MPI communicator
5525 .  m - number of local rows (Cannot be PETSC_DECIDE)
5526 .  n - This value should be the same as the local size used in creating the
5527        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5528        calculated if N is given) For square matrices n is almost always m.
5529 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5530 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5531 .   i - row indices for "diagonal" portion of matrix
5532 .   j - column indices
5533 .   a - matrix values
5534 .   oi - row indices for "off-diagonal" portion of matrix
5535 .   oj - column indices
5536 -   oa - matrix values
5537 
5538    Output Parameter:
5539 .   mat - the matrix
5540 
5541    Level: advanced
5542 
5543    Notes:
5544        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5545        must free the arrays once the matrix has been destroyed and not before.
5546 
5547        The i and j indices are 0 based
5548 
5549        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5550 
5551        This sets local rows and cannot be used to set off-processor values.
5552 
5553        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5554        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5555        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5556        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5557        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5558        communication if it is known that only local entries will be set.
5559 
5560 .keywords: matrix, aij, compressed row, sparse, parallel
5561 
5562 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5563           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5564 @*/
5565 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5566 {
5567   PetscErrorCode ierr;
5568   Mat_MPIAIJ     *maij;
5569 
5570   PetscFunctionBegin;
5571   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5572   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5573   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5574   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5575   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5576   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5577   maij = (Mat_MPIAIJ*) (*mat)->data;
5578 
5579   (*mat)->preallocated = PETSC_TRUE;
5580 
5581   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5582   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5583 
5584   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5585   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5586 
5587   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5588   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5589   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5590   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5591 
5592   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5593   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5594   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5595   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5596   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5597   PetscFunctionReturn(0);
5598 }
5599 
5600 /*
5601     Special version for direct calls from Fortran
5602 */
5603 #include <petsc/private/fortranimpl.h>
5604 
5605 /* Change these macros so can be used in void function */
5606 #undef CHKERRQ
5607 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5608 #undef SETERRQ2
5609 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5610 #undef SETERRQ3
5611 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5612 #undef SETERRQ
5613 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5614 
5615 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5616 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5617 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5618 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5619 #else
5620 #endif
5621 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5622 {
5623   Mat            mat  = *mmat;
5624   PetscInt       m    = *mm, n = *mn;
5625   InsertMode     addv = *maddv;
5626   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5627   PetscScalar    value;
5628   PetscErrorCode ierr;
5629 
5630   MatCheckPreallocated(mat,1);
5631   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5632 
5633 #if defined(PETSC_USE_DEBUG)
5634   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5635 #endif
5636   {
5637     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5638     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5639     PetscBool roworiented = aij->roworiented;
5640 
5641     /* Some Variables required in the macro */
5642     Mat        A                 = aij->A;
5643     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5644     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5645     MatScalar  *aa               = a->a;
5646     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5647     Mat        B                 = aij->B;
5648     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5649     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5650     MatScalar  *ba               = b->a;
5651 
5652     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5653     PetscInt  nonew = a->nonew;
5654     MatScalar *ap1,*ap2;
5655 
5656     PetscFunctionBegin;
5657     for (i=0; i<m; i++) {
5658       if (im[i] < 0) continue;
5659 #if defined(PETSC_USE_DEBUG)
5660       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5661 #endif
5662       if (im[i] >= rstart && im[i] < rend) {
5663         row      = im[i] - rstart;
5664         lastcol1 = -1;
5665         rp1      = aj + ai[row];
5666         ap1      = aa + ai[row];
5667         rmax1    = aimax[row];
5668         nrow1    = ailen[row];
5669         low1     = 0;
5670         high1    = nrow1;
5671         lastcol2 = -1;
5672         rp2      = bj + bi[row];
5673         ap2      = ba + bi[row];
5674         rmax2    = bimax[row];
5675         nrow2    = bilen[row];
5676         low2     = 0;
5677         high2    = nrow2;
5678 
5679         for (j=0; j<n; j++) {
5680           if (roworiented) value = v[i*n+j];
5681           else value = v[i+j*m];
5682           if (in[j] >= cstart && in[j] < cend) {
5683             col = in[j] - cstart;
5684             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5685             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5686           } else if (in[j] < 0) continue;
5687 #if defined(PETSC_USE_DEBUG)
5688           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5689 #endif
5690           else {
5691             if (mat->was_assembled) {
5692               if (!aij->colmap) {
5693                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5694               }
5695 #if defined(PETSC_USE_CTABLE)
5696               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5697               col--;
5698 #else
5699               col = aij->colmap[in[j]] - 1;
5700 #endif
5701               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5702               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5703                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5704                 col  =  in[j];
5705                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5706                 B     = aij->B;
5707                 b     = (Mat_SeqAIJ*)B->data;
5708                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5709                 rp2   = bj + bi[row];
5710                 ap2   = ba + bi[row];
5711                 rmax2 = bimax[row];
5712                 nrow2 = bilen[row];
5713                 low2  = 0;
5714                 high2 = nrow2;
5715                 bm    = aij->B->rmap->n;
5716                 ba    = b->a;
5717               }
5718             } else col = in[j];
5719             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5720           }
5721         }
5722       } else if (!aij->donotstash) {
5723         if (roworiented) {
5724           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5725         } else {
5726           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5727         }
5728       }
5729     }
5730   }
5731   PetscFunctionReturnVoid();
5732 }
5733 
5734